From nobody Mon May 6 21:27:55 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1641316636532336.47617749859205; Tue, 4 Jan 2022 09:17:16 -0800 (PST) Received: from localhost ([::1]:33664 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1n4nQv-0006iK-Re for importer@patchew.org; Tue, 04 Jan 2022 12:17:14 -0500 Received: from eggs.gnu.org ([209.51.188.92]:55458) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1n4nOK-0003sx-4T for qemu-devel@nongnu.org; Tue, 04 Jan 2022 12:14:32 -0500 Received: from prt-mail.chinatelecom.cn ([42.123.76.226]:48530 helo=chinatelecom.cn) by eggs.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1n4nOH-00008Q-Gv for qemu-devel@nongnu.org; Tue, 04 Jan 2022 12:14:31 -0500 Received: from clientip-125.69.40.99 (unknown [172.18.0.218]) by chinatelecom.cn (HERMES) with SMTP id 992752800A4; Wed, 5 Jan 2022 01:14:22 +0800 (CST) Received: from ([172.18.0.218]) by app0025 with ESMTP id 352a438951ed4e6c8ba8df68b989e403 for qemu-devel@nongnu.org; Wed, 05 Jan 2022 01:14:25 CST HMM_SOURCE_IP: 172.18.0.218:33794.643395762 HMM_ATTACHE_NUM: 0000 HMM_SOURCE_TYPE: SMTP X-189-SAVE-TO-SEND: +huangy81@chinatelecom.cn X-Transaction-ID: 352a438951ed4e6c8ba8df68b989e403 X-Real-From: huangy81@chinatelecom.cn X-Receive-IP: 172.18.0.218 X-MEDUSA-Status: 0 From: huangy81@chinatelecom.cn To: qemu-devel Subject: [PATCH v11 1/4] migration/dirtyrate: refactor dirty page rate calculation Date: Wed, 5 Jan 2022 01:14:06 +0800 Message-Id: <7cc032ae98e29471de57c00d3c0bd0fc5129ae23.1641316375.git.huangy81@chinatelecom.cn> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: References: In-Reply-To: References: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=42.123.76.226; envelope-from=huangy81@chinatelecom.cn; helo=chinatelecom.cn X-Spam_score_int: 0 X-Spam_score: -0.0 X-Spam_bar: / X-Spam_report: (-0.0 / 5.0 requ) SPF_HELO_PASS=-0.001, SPF_PASS=-0.001 autolearn=unavailable autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Eduardo Habkost , David Hildenbrand , Hyman , Juan Quintela , Richard Henderson , Markus ArmBruster , Peter Xu , "Dr. David Alan Gilbert" , Paolo Bonzini , =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1641316640069100005 From: Hyman Huang(=E9=BB=84=E5=8B=87) abstract out dirty log change logic into function global_dirty_log_change. abstract out dirty page rate calculation logic via dirty-ring into function vcpu_calculate_dirtyrate. abstract out mathematical dirty page rate calculation into do_calculate_dirtyrate, decouple it from DirtyStat. rename set_sample_page_period to dirty_stat_wait, which is well-understood and will be reused in dirtylimit. add cpu_list_lock to protect cpu list before walking through it in case of race against cpu hotplug/unplug. export util functions outside migration. Signed-off-by: Hyman Huang(=E9=BB=84=E5=8B=87) --- include/sysemu/dirtyrate.h | 29 ++++++ migration/dirtyrate.c | 220 ++++++++++++++++++++++++++++-------------= ---- migration/dirtyrate.h | 7 +- 3 files changed, 171 insertions(+), 85 deletions(-) create mode 100644 include/sysemu/dirtyrate.h diff --git a/include/sysemu/dirtyrate.h b/include/sysemu/dirtyrate.h new file mode 100644 index 0000000..cb6f02b --- /dev/null +++ b/include/sysemu/dirtyrate.h @@ -0,0 +1,29 @@ +/* + * dirty page rate helper functions + * + * Copyright (c) 2022 CHINA TELECOM CO.,LTD. + * + * Authors: + * Hyman Huang(=E9=BB=84=E5=8B=87) + * + * This work is licensed under the terms of the GNU GPL, version 2 or late= r. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_DIRTYRATE_H +#define QEMU_DIRTYRATE_H + +typedef struct VcpuStat { + int nvcpu; /* number of vcpu */ + DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */ +} VcpuStat; + +int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms, + int64_t init_time_ms, + VcpuStat *stat, + unsigned int flag, + bool one_shot); + +void global_dirty_log_change(unsigned int flag, + bool start); +#endif diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index d65e744..1407455 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -46,7 +46,7 @@ static struct DirtyRateStat DirtyStat; static DirtyRateMeasureMode dirtyrate_mode =3D DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING; =20 -static int64_t set_sample_page_period(int64_t msec, int64_t initial_time) +static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time) { int64_t current_time; =20 @@ -60,6 +60,128 @@ static int64_t set_sample_page_period(int64_t msec, int= 64_t initial_time) return msec; } =20 +static inline void record_dirtypages(DirtyPageRecord *dirty_pages, + CPUState *cpu, bool start) +{ + if (start) { + dirty_pages[cpu->cpu_index].start_pages =3D cpu->dirty_pages; + } else { + dirty_pages[cpu->cpu_index].end_pages =3D cpu->dirty_pages; + } +} + +static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages, + int64_t calc_time_ms) +{ + uint64_t memory_size_MB; + uint64_t increased_dirty_pages =3D + dirty_pages.end_pages - dirty_pages.start_pages; + + memory_size_MB =3D (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20; + + return memory_size_MB * 1000 / calc_time_ms; +} + +void global_dirty_log_change(unsigned int flag, bool start) +{ + qemu_mutex_lock_iothread(); + if (start) { + memory_global_dirty_log_start(flag); + } else { + memory_global_dirty_log_stop(flag); + } + qemu_mutex_unlock_iothread(); +} + +/* + * global_dirty_log_sync + * 1. sync dirty log from kvm + * 2. stop dirty tracking if needed. + */ +static void global_dirty_log_sync(unsigned int flag, bool one_shot) +{ + qemu_mutex_lock_iothread(); + memory_global_dirty_log_sync(); + if (one_shot) { + memory_global_dirty_log_stop(flag); + } + qemu_mutex_unlock_iothread(); +} + +static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat) +{ + CPUState *cpu; + DirtyPageRecord *records; + int nvcpu =3D 0; + + CPU_FOREACH(cpu) { + nvcpu++; + } + + stat->nvcpu =3D nvcpu; + stat->rates =3D g_malloc0(sizeof(DirtyRateVcpu) * nvcpu); + + records =3D g_malloc0(sizeof(DirtyPageRecord) * nvcpu); + + return records; +} + +static void vcpu_dirty_stat_collect(VcpuStat *stat, + DirtyPageRecord *records, + bool start) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + if (!start && cpu->cpu_index >=3D stat->nvcpu) { + /* + * Never go there unless cpu is hot-plugged, + * just ignore in this case. + */ + continue; + } + record_dirtypages(records, cpu, start); + } +} + +int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms, + int64_t init_time_ms, + VcpuStat *stat, + unsigned int flag, + bool one_shot) +{ + DirtyPageRecord *records; + int64_t duration; + int64_t dirtyrate; + int i =3D 0; + + cpu_list_lock(); + records =3D vcpu_dirty_stat_alloc(stat); + vcpu_dirty_stat_collect(stat, records, true); + cpu_list_unlock(); + + duration =3D dirty_stat_wait(calc_time_ms, init_time_ms); + + global_dirty_log_sync(flag, one_shot); + + cpu_list_lock(); + vcpu_dirty_stat_collect(stat, records, false); + cpu_list_unlock(); + + for (i =3D 0; i < stat->nvcpu; i++) { + dirtyrate =3D do_calculate_dirtyrate(records[i], duration); + + stat->rates[i].id =3D i; + stat->rates[i].dirty_rate =3D dirtyrate; + + trace_dirtyrate_do_calculate_vcpu(i, dirtyrate); + } + + g_free(records); + + return duration; +} + static bool is_sample_period_valid(int64_t sec) { if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC || @@ -396,44 +518,6 @@ static bool compare_page_hash_info(struct RamblockDirt= yInfo *info, return true; } =20 -static inline void record_dirtypages(DirtyPageRecord *dirty_pages, - CPUState *cpu, bool start) -{ - if (start) { - dirty_pages[cpu->cpu_index].start_pages =3D cpu->dirty_pages; - } else { - dirty_pages[cpu->cpu_index].end_pages =3D cpu->dirty_pages; - } -} - -static void dirtyrate_global_dirty_log_start(void) -{ - qemu_mutex_lock_iothread(); - memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE); - qemu_mutex_unlock_iothread(); -} - -static void dirtyrate_global_dirty_log_stop(void) -{ - qemu_mutex_lock_iothread(); - memory_global_dirty_log_sync(); - memory_global_dirty_log_stop(GLOBAL_DIRTY_DIRTY_RATE); - qemu_mutex_unlock_iothread(); -} - -static int64_t do_calculate_dirtyrate_vcpu(DirtyPageRecord dirty_pages) -{ - uint64_t memory_size_MB; - int64_t time_s; - uint64_t increased_dirty_pages =3D - dirty_pages.end_pages - dirty_pages.start_pages; - - memory_size_MB =3D (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20; - time_s =3D DirtyStat.calc_time; - - return memory_size_MB / time_s; -} - static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages, bool start) { @@ -444,11 +528,6 @@ static inline void record_dirtypages_bitmap(DirtyPageR= ecord *dirty_pages, } } =20 -static void do_calculate_dirtyrate_bitmap(DirtyPageRecord dirty_pages) -{ - DirtyStat.dirty_rate =3D do_calculate_dirtyrate_vcpu(dirty_pages); -} - static inline void dirtyrate_manual_reset_protect(void) { RAMBlock *block =3D NULL; @@ -492,71 +571,52 @@ static void calculate_dirtyrate_dirty_bitmap(struct D= irtyRateConfig config) DirtyStat.start_time =3D start_time / 1000; =20 msec =3D config.sample_period_seconds * 1000; - msec =3D set_sample_page_period(msec, start_time); + msec =3D dirty_stat_wait(msec, start_time); DirtyStat.calc_time =3D msec / 1000; =20 /* - * dirtyrate_global_dirty_log_stop do two things. + * do two things. * 1. fetch dirty bitmap from kvm * 2. stop dirty tracking */ - dirtyrate_global_dirty_log_stop(); + global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true); =20 record_dirtypages_bitmap(&dirty_pages, false); =20 - do_calculate_dirtyrate_bitmap(dirty_pages); + DirtyStat.dirty_rate =3D do_calculate_dirtyrate(dirty_pages, msec); } =20 static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config) { - CPUState *cpu; - int64_t msec =3D 0; int64_t start_time; + int64_t duration; uint64_t dirtyrate =3D 0; uint64_t dirtyrate_sum =3D 0; - DirtyPageRecord *dirty_pages; - int nvcpu =3D 0; int i =3D 0; =20 - CPU_FOREACH(cpu) { - nvcpu++; - } - - dirty_pages =3D malloc(sizeof(*dirty_pages) * nvcpu); - - DirtyStat.dirty_ring.nvcpu =3D nvcpu; - DirtyStat.dirty_ring.rates =3D malloc(sizeof(DirtyRateVcpu) * nvcpu); - - dirtyrate_global_dirty_log_start(); - - CPU_FOREACH(cpu) { - record_dirtypages(dirty_pages, cpu, true); - } + /* start log sync */ + global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true); =20 start_time =3D qemu_clock_get_ms(QEMU_CLOCK_REALTIME); DirtyStat.start_time =3D start_time / 1000; =20 - msec =3D config.sample_period_seconds * 1000; - msec =3D set_sample_page_period(msec, start_time); - DirtyStat.calc_time =3D msec / 1000; + /* calculate vcpu dirtyrate */ + duration =3D vcpu_calculate_dirtyrate(config.sample_period_seconds * 1= 000, + start_time, + &DirtyStat.dirty_ring, + GLOBAL_DIRTY_DIRTY_RATE, + true); =20 - dirtyrate_global_dirty_log_stop(); - - CPU_FOREACH(cpu) { - record_dirtypages(dirty_pages, cpu, false); - } + DirtyStat.calc_time =3D duration / 1000; =20 + /* calculate vm dirtyrate */ for (i =3D 0; i < DirtyStat.dirty_ring.nvcpu; i++) { - dirtyrate =3D do_calculate_dirtyrate_vcpu(dirty_pages[i]); - trace_dirtyrate_do_calculate_vcpu(i, dirtyrate); - - DirtyStat.dirty_ring.rates[i].id =3D i; + dirtyrate =3D DirtyStat.dirty_ring.rates[i].dirty_rate; DirtyStat.dirty_ring.rates[i].dirty_rate =3D dirtyrate; dirtyrate_sum +=3D dirtyrate; } =20 DirtyStat.dirty_rate =3D dirtyrate_sum; - free(dirty_pages); } =20 static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config) @@ -574,7 +634,7 @@ static void calculate_dirtyrate_sample_vm(struct DirtyR= ateConfig config) rcu_read_unlock(); =20 msec =3D config.sample_period_seconds * 1000; - msec =3D set_sample_page_period(msec, initial_time); + msec =3D dirty_stat_wait(msec, initial_time); DirtyStat.start_time =3D initial_time / 1000; DirtyStat.calc_time =3D msec / 1000; =20 diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h index 69d4c5b..594a5c0 100644 --- a/migration/dirtyrate.h +++ b/migration/dirtyrate.h @@ -13,6 +13,8 @@ #ifndef QEMU_MIGRATION_DIRTYRATE_H #define QEMU_MIGRATION_DIRTYRATE_H =20 +#include "sysemu/dirtyrate.h" + /* * Sample 512 pages per GB as default. */ @@ -65,11 +67,6 @@ typedef struct SampleVMStat { uint64_t total_block_mem_MB; /* size of total sampled pages in MB */ } SampleVMStat; =20 -typedef struct VcpuStat { - int nvcpu; /* number of vcpu */ - DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */ -} VcpuStat; - /* * Store calculation statistics for each measure. */ --=20 1.8.3.1 From nobody Mon May 6 21:27:55 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1641316636391540.6038594793532; Tue, 4 Jan 2022 09:17:16 -0800 (PST) Received: from localhost ([::1]:33586 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1n4nQv-0006fP-Rt for importer@patchew.org; Tue, 04 Jan 2022 12:17:14 -0500 Received: from eggs.gnu.org ([209.51.188.92]:55460) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1n4nOK-0003tt-Jp for qemu-devel@nongnu.org; Tue, 04 Jan 2022 12:14:32 -0500 Received: from prt-mail.chinatelecom.cn ([42.123.76.226]:48541 helo=chinatelecom.cn) by eggs.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1n4nOI-00008a-Ap for qemu-devel@nongnu.org; Tue, 04 Jan 2022 12:14:32 -0500 Received: from clientip-125.69.40.99 (unknown [172.18.0.218]) by chinatelecom.cn (HERMES) with SMTP id 03219280097; Wed, 5 Jan 2022 01:14:25 +0800 (CST) Received: from ([172.18.0.218]) by app0025 with ESMTP id 8e31393c386c4e3b8304f0621c95ddbe for qemu-devel@nongnu.org; Wed, 05 Jan 2022 01:14:28 CST HMM_SOURCE_IP: 172.18.0.218:33794.643395762 HMM_ATTACHE_NUM: 0000 HMM_SOURCE_TYPE: SMTP X-189-SAVE-TO-SEND: +huangy81@chinatelecom.cn X-Transaction-ID: 8e31393c386c4e3b8304f0621c95ddbe X-Real-From: huangy81@chinatelecom.cn X-Receive-IP: 172.18.0.218 X-MEDUSA-Status: 0 From: huangy81@chinatelecom.cn To: qemu-devel Subject: [PATCH v11 2/4] softmmu/dirtylimit: implement vCPU dirtyrate calculation periodically Date: Wed, 5 Jan 2022 01:14:07 +0800 Message-Id: <3f7e224b0f367ec64a21929d110b18ded2ff6d22.1641316375.git.huangy81@chinatelecom.cn> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: References: In-Reply-To: References: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=42.123.76.226; envelope-from=huangy81@chinatelecom.cn; helo=chinatelecom.cn X-Spam_score_int: 0 X-Spam_score: -0.0 X-Spam_bar: / X-Spam_report: (-0.0 / 5.0 requ) SPF_HELO_PASS=-0.001, SPF_PASS=-0.001 autolearn=unavailable autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Eduardo Habkost , David Hildenbrand , Hyman , Juan Quintela , Richard Henderson , Markus ArmBruster , Peter Xu , "Dr. David Alan Gilbert" , Paolo Bonzini , =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1641316640003100001 From: Hyman Huang(=E9=BB=84=E5=8B=87) Introduce the third method GLOBAL_DIRTY_LIMIT of dirty tracking for calculate dirtyrate periodly for dirty page rate limit. Add dirtylimit.c to implement dirtyrate calculation periodly, which will be used for dirty page rate limit. Add dirtylimit.h to export util functions for dirty page rate limit implementation. Signed-off-by: Hyman Huang(=E9=BB=84=E5=8B=87) Reviewed-by: Peter Xu --- include/exec/memory.h | 5 +- include/sysemu/dirtylimit.h | 22 ++++++++ softmmu/dirtylimit.c | 120 ++++++++++++++++++++++++++++++++++++++++= ++++ softmmu/meson.build | 1 + 4 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 include/sysemu/dirtylimit.h create mode 100644 softmmu/dirtylimit.c diff --git a/include/exec/memory.h b/include/exec/memory.h index 20f1b27..606bec8 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -69,7 +69,10 @@ static inline void fuzz_dma_read_cb(size_t addr, /* Dirty tracking enabled because measuring dirty rate */ #define GLOBAL_DIRTY_DIRTY_RATE (1U << 1) =20 -#define GLOBAL_DIRTY_MASK (0x3) +/* Dirty tracking enabled because dirty limit */ +#define GLOBAL_DIRTY_LIMIT (1U << 2) + +#define GLOBAL_DIRTY_MASK (0x7) =20 extern unsigned int global_dirty_tracking; =20 diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h new file mode 100644 index 0000000..da459f0 --- /dev/null +++ b/include/sysemu/dirtylimit.h @@ -0,0 +1,22 @@ +/* + * Dirty page rate limit common functions + * + * Copyright (c) 2022 CHINA TELECOM CO.,LTD. + * + * Authors: + * Hyman Huang(=E9=BB=84=E5=8B=87) + * + * This work is licensed under the terms of the GNU GPL, version 2 or late= r. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_DIRTYRLIMIT_H +#define QEMU_DIRTYRLIMIT_H + +#define DIRTYLIMIT_CALC_TIME_MS 1000 /* 1000ms */ + +int64_t vcpu_dirty_rate_get(int cpu_index); +void vcpu_dirty_rate_stat_start(void); +void vcpu_dirty_rate_stat_stop(void); +void vcpu_dirty_rate_stat_initialize(void); +void vcpu_dirty_rate_stat_finalize(void); +#endif diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c new file mode 100644 index 0000000..a10ac6f --- /dev/null +++ b/softmmu/dirtylimit.c @@ -0,0 +1,120 @@ +/* + * Dirty page rate limit implementation code + * + * Copyright (c) 2022 CHINA TELECOM CO.,LTD. + * + * Authors: + * Hyman Huang(=E9=BB=84=E5=8B=87) + * + * This work is licensed under the terms of the GNU GPL, version 2 or late= r. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/main-loop.h" +#include "qapi/qapi-commands-migration.h" +#include "sysemu/dirtyrate.h" +#include "sysemu/dirtylimit.h" +#include "exec/memory.h" +#include "hw/boards.h" + +struct { + VcpuStat stat; + bool running; + QemuThread thread; +} *vcpu_dirty_rate_stat; + +static void vcpu_dirty_rate_stat_collect(void) +{ + int64_t start_time; + VcpuStat stat; + int i =3D 0; + + start_time =3D qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + + /* calculate vcpu dirtyrate */ + vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS, + start_time, + &stat, + GLOBAL_DIRTY_LIMIT, + false); + + for (i =3D 0; i < stat.nvcpu; i++) { + vcpu_dirty_rate_stat->stat.rates[i].id =3D i; + vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =3D + stat.rates[i].dirty_rate; + } + + free(stat.rates); +} + +static void *vcpu_dirty_rate_stat_thread(void *opaque) +{ + rcu_register_thread(); + + /* start log sync */ + global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true); + + while (qatomic_read(&vcpu_dirty_rate_stat->running)) { + vcpu_dirty_rate_stat_collect(); + } + + /* stop log sync */ + global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false); + + rcu_unregister_thread(); + return NULL; +} + +int64_t vcpu_dirty_rate_get(int cpu_index) +{ + DirtyRateVcpu *rates =3D vcpu_dirty_rate_stat->stat.rates; + return qatomic_read(&rates[cpu_index].dirty_rate); +} + +void vcpu_dirty_rate_stat_start(void) +{ + if (qatomic_read(&vcpu_dirty_rate_stat->running)) { + return; + } + + qatomic_set(&vcpu_dirty_rate_stat->running, 1); + qemu_thread_create(&vcpu_dirty_rate_stat->thread, + "dirtyrate-stat", + vcpu_dirty_rate_stat_thread, + NULL, + QEMU_THREAD_JOINABLE); +} + +void vcpu_dirty_rate_stat_stop(void) +{ + qatomic_set(&vcpu_dirty_rate_stat->running, 0); + qemu_mutex_unlock_iothread(); + qemu_thread_join(&vcpu_dirty_rate_stat->thread); + qemu_mutex_lock_iothread(); +} + +void vcpu_dirty_rate_stat_initialize(void) +{ + MachineState *ms =3D MACHINE(qdev_get_machine()); + int max_cpus =3D ms->smp.max_cpus; + + vcpu_dirty_rate_stat =3D + g_malloc0(sizeof(*vcpu_dirty_rate_stat)); + + vcpu_dirty_rate_stat->stat.nvcpu =3D max_cpus; + vcpu_dirty_rate_stat->stat.rates =3D + g_malloc0(sizeof(DirtyRateVcpu) * max_cpus); + + vcpu_dirty_rate_stat->running =3D false; +} + +void vcpu_dirty_rate_stat_finalize(void) +{ + free(vcpu_dirty_rate_stat->stat.rates); + vcpu_dirty_rate_stat->stat.rates =3D NULL; + + free(vcpu_dirty_rate_stat); + vcpu_dirty_rate_stat =3D NULL; +} diff --git a/softmmu/meson.build b/softmmu/meson.build index d8e0301..95029a5 100644 --- a/softmmu/meson.build +++ b/softmmu/meson.build @@ -15,6 +15,7 @@ specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: [files( 'vl.c', 'cpu-timers.c', 'runstate-action.c', + 'dirtylimit.c', )]) =20 specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: [files( --=20 1.8.3.1 From nobody Mon May 6 21:27:55 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1641316719876250.30805044548515; Tue, 4 Jan 2022 09:18:39 -0800 (PST) Received: from localhost ([::1]:39146 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1n4nSI-0001w1-Sd for importer@patchew.org; Tue, 04 Jan 2022 12:18:38 -0500 Received: from eggs.gnu.org ([209.51.188.92]:55492) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1n4nOQ-000452-0A for qemu-devel@nongnu.org; Tue, 04 Jan 2022 12:14:38 -0500 Received: from prt-mail.chinatelecom.cn ([42.123.76.226]:48554 helo=chinatelecom.cn) by eggs.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1n4nOM-00009I-4C for qemu-devel@nongnu.org; Tue, 04 Jan 2022 12:14:36 -0500 Received: from clientip-125.69.40.99 (unknown [172.18.0.218]) by chinatelecom.cn (HERMES) with SMTP id D184F280099; Wed, 5 Jan 2022 01:14:29 +0800 (CST) Received: from ([172.18.0.218]) by app0025 with ESMTP id f3d0e93427eb426d843a02d9d93ba84a for qemu-devel@nongnu.org; Wed, 05 Jan 2022 01:14:32 CST HMM_SOURCE_IP: 172.18.0.218:33794.643395762 HMM_ATTACHE_NUM: 0000 HMM_SOURCE_TYPE: SMTP X-189-SAVE-TO-SEND: +huangy81@chinatelecom.cn X-Transaction-ID: f3d0e93427eb426d843a02d9d93ba84a X-Real-From: huangy81@chinatelecom.cn X-Receive-IP: 172.18.0.218 X-MEDUSA-Status: 0 From: huangy81@chinatelecom.cn To: qemu-devel Subject: [PATCH v11 3/4] softmmu/dirtylimit: implement virtual CPU throttle Date: Wed, 5 Jan 2022 01:14:08 +0800 Message-Id: <0381e32c2cc70613613aaa284b8e8c9760d6932f.1641316375.git.huangy81@chinatelecom.cn> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: References: In-Reply-To: References: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=42.123.76.226; envelope-from=huangy81@chinatelecom.cn; helo=chinatelecom.cn X-Spam_score_int: 0 X-Spam_score: -0.0 X-Spam_bar: / X-Spam_report: (-0.0 / 5.0 requ) SPF_HELO_PASS=-0.001, SPF_PASS=-0.001 autolearn=unavailable autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Eduardo Habkost , David Hildenbrand , Hyman , Juan Quintela , Richard Henderson , Markus ArmBruster , Peter Xu , "Dr. David Alan Gilbert" , Paolo Bonzini , =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1641316721476100003 From: Hyman Huang(=E9=BB=84=E5=8B=87) Setup a negative feedback system when vCPU thread handling KVM_EXIT_DIRTY_RING_FULL exit by introducing throttle_us_per_full field in struct CPUState. Sleep throttle_us_per_full microseconds to throttle vCPU if dirtylimit is enabled. Start a thread to track current dirty page rates and tune the throttle_us_per_full dynamically untill current dirty page rate reach the quota. Introduce the util function in the header for dirtylimit implementation. Signed-off-by: Hyman Huang(=E9=BB=84=E5=8B=87) --- accel/kvm/kvm-all.c | 8 + include/hw/core/cpu.h | 6 + include/sysemu/dirtylimit.h | 12 ++ include/sysemu/kvm.h | 2 + qapi/migration.json | 19 +++ softmmu/dirtylimit.c | 357 ++++++++++++++++++++++++++++++++++++++++= ++++ softmmu/trace-events | 8 + 7 files changed, 412 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 0e66ebb..908d954 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -45,6 +45,7 @@ #include "qemu/guest-random.h" #include "sysemu/hw_accel.h" #include "kvm-cpus.h" +#include "sysemu/dirtylimit.h" =20 #include "hw/boards.h" =20 @@ -476,6 +477,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) cpu->kvm_state =3D s; cpu->vcpu_dirty =3D true; cpu->dirty_pages =3D 0; + cpu->throttle_us_per_full =3D 0; =20 mmap_size =3D kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); if (mmap_size < 0) { @@ -2309,6 +2311,11 @@ bool kvm_dirty_ring_enabled(void) return kvm_state->kvm_dirty_ring_size ? true : false; } =20 +uint32_t kvm_dirty_ring_size(void) +{ + return kvm_state->kvm_dirty_ring_size; +} + static int kvm_init(MachineState *ms) { MachineClass *mc =3D MACHINE_GET_CLASS(ms); @@ -2958,6 +2965,7 @@ int kvm_cpu_exec(CPUState *cpu) qemu_mutex_lock_iothread(); kvm_dirty_ring_reap(kvm_state); qemu_mutex_unlock_iothread(); + dirtylimit_vcpu_execute(cpu); ret =3D 0; break; case KVM_EXIT_SYSTEM_EVENT: diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index e948e81..9631c1e 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -411,6 +411,12 @@ struct CPUState { */ bool throttle_thread_scheduled; =20 + /* + * Sleep throttle_us_per_full microseconds once dirty ring is full + * if dirty page rate limit is enabled. + */ + int64_t throttle_us_per_full; + bool ignore_memory_transaction_failures; =20 struct hax_vcpu_state *hax_vcpu; diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h index da459f0..6eadd16 100644 --- a/include/sysemu/dirtylimit.h +++ b/include/sysemu/dirtylimit.h @@ -19,4 +19,16 @@ void vcpu_dirty_rate_stat_start(void); void vcpu_dirty_rate_stat_stop(void); void vcpu_dirty_rate_stat_initialize(void); void vcpu_dirty_rate_stat_finalize(void); + +void dirtylimit_state_initialize(void); +void dirtylimit_state_finalize(void); +void dirtylimit_thread_finalize(void); +bool dirtylimit_in_service(void); +bool dirtylimit_vcpu_index_valid(int cpu_index); +void dirtylimit_set_vcpu(int cpu_index, + uint64_t quota, + bool enable); +void dirtylimit_set_all(uint64_t quota, + bool enable); +void dirtylimit_vcpu_execute(CPUState *cpu); #endif diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 6eb39a0..bc3f0b5 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -563,4 +563,6 @@ bool kvm_cpu_check_are_resettable(void); bool kvm_arch_cpu_check_are_resettable(void); =20 bool kvm_dirty_ring_enabled(void); + +uint32_t kvm_dirty_ring_size(void); #endif diff --git a/qapi/migration.json b/qapi/migration.json index bbfd48c..ac5fa56 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1850,6 +1850,25 @@ { 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' } =20 ## +# @DirtyLimitInfo: +# +# Dirty page rate limit information of virtual CPU. +# +# @cpu-index: index of virtual CPU. +# +# @limit-rate: upper limit of dirty page rate for virtual CPU. +# +# @current-rate: current dirty page rate for virtual CPU. +# +# Since: 7.0 +# +## +{ 'struct': 'DirtyLimitInfo', + 'data': { 'cpu-index': 'int', + 'limit-rate': 'int64', + 'current-rate': 'int64' } } + +## # @snapshot-save: # # Save a VM snapshot diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c index a10ac6f..c9f5745 100644 --- a/softmmu/dirtylimit.c +++ b/softmmu/dirtylimit.c @@ -18,6 +18,26 @@ #include "sysemu/dirtylimit.h" #include "exec/memory.h" #include "hw/boards.h" +#include "sysemu/kvm.h" +#include "trace.h" + +/* + * Dirtylimit stop working if dirty page rate error + * value less than DIRTYLIMIT_TOLERANCE_RANGE + */ +#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ +/* + * Plus or minus vcpu sleep time linearly if dirty + * page rate error value percentage over + * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT. + * Otherwise, plus or minus a fixed vcpu sleep time. + */ +#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50 +/* + * Max vcpu sleep time percentage during a cycle + * composed of dirty ring full and sleep time. + */ +#define DIRTYLIMIT_THROTTLE_PCT_MAX 99 =20 struct { VcpuStat stat; @@ -25,6 +45,36 @@ struct { QemuThread thread; } *vcpu_dirty_rate_stat; =20 +typedef struct VcpuDirtyLimitState { + int cpu_index; + bool enabled; + /* + * Quota dirty page rate, unit is MB/s + * zero if not enabled. + */ + uint64_t quota; + /* + * How many times that the current dirty page + * rate unmatch the quota dirty page rate. + */ + int unmatched_cnt; +} VcpuDirtyLimitState; + +struct { + VcpuDirtyLimitState *states; + /* Max cpus number configured by user */ + int max_cpus; + /* Number of vcpu under dirtylimit */ + int limited_nvcpu; +} *dirtylimit_state; + +/* protect dirtylimit_state */ +static QemuMutex dirtylimit_mutex; +static QemuThread dirtylimit_thr; + +/* dirtylimit thread quit if dirtylimit_quit is true */ +static bool dirtylimit_quit; + static void vcpu_dirty_rate_stat_collect(void) { int64_t start_time; @@ -118,3 +168,310 @@ void vcpu_dirty_rate_stat_finalize(void) free(vcpu_dirty_rate_stat); vcpu_dirty_rate_stat =3D NULL; } + +static void dirtylimit_state_lock(void) +{ + qemu_mutex_lock(&dirtylimit_mutex); +} + +static void dirtylimit_state_unlock(void) +{ + qemu_mutex_unlock(&dirtylimit_mutex); +} + +static void +__attribute__((__constructor__)) dirtylimit_mutex_init(void) +{ + qemu_mutex_init(&dirtylimit_mutex); +} + +static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index) +{ + return &dirtylimit_state->states[cpu_index]; +} + +void dirtylimit_state_initialize(void) +{ + MachineState *ms =3D MACHINE(qdev_get_machine()); + int max_cpus =3D ms->smp.max_cpus; + int i; + + dirtylimit_state =3D g_malloc0(sizeof(*dirtylimit_state)); + + dirtylimit_state->states =3D + g_malloc0(sizeof(VcpuDirtyLimitState) * max_cpus); + + for (i =3D 0; i < max_cpus; i++) { + dirtylimit_state->states[i].cpu_index =3D i; + } + + dirtylimit_state->max_cpus =3D max_cpus; + trace_dirtylimit_state_initialize(max_cpus); +} + +void dirtylimit_state_finalize(void) +{ + free(dirtylimit_state->states); + dirtylimit_state->states =3D NULL; + + free(dirtylimit_state); + dirtylimit_state =3D NULL; + + trace_dirtylimit_state_finalize(); +} + +bool dirtylimit_in_service(void) +{ + return !!dirtylimit_state; +} + +bool dirtylimit_vcpu_index_valid(int cpu_index) +{ + MachineState *ms =3D MACHINE(qdev_get_machine()); + + return !(cpu_index < 0 || + cpu_index >=3D ms->smp.max_cpus); +} + +static inline void dirtylimit_vcpu_set_quota(int cpu_index, + uint64_t quota, + bool on) +{ + dirtylimit_state->states[cpu_index].quota =3D quota; + if (on) { + if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) { + dirtylimit_state->limited_nvcpu++; + } + } else { + if (dirtylimit_state->states[cpu_index].enabled) { + dirtylimit_state->limited_nvcpu--; + } + } + + dirtylimit_state->states[cpu_index].enabled =3D on; +} + +static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) +{ + static uint64_t max_dirtyrate; + uint32_t dirty_ring_size =3D kvm_dirty_ring_size(); + uint64_t dirty_ring_size_meory_MB =3D + dirty_ring_size * TARGET_PAGE_SIZE >> 20; + + if (max_dirtyrate < dirtyrate) { + max_dirtyrate =3D dirtyrate; + } + + return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate; +} + +static inline bool dirtylimit_done(uint64_t quota, + uint64_t current) +{ + uint64_t min, max; + + min =3D MIN(quota, current); + max =3D MAX(quota, current); + + return ((max - min) <=3D DIRTYLIMIT_TOLERANCE_RANGE) ? true : false; +} + +static inline bool +dirtylimit_need_linear_adjustment(uint64_t quota, + uint64_t current) +{ + uint64_t min, max, pct; + + min =3D MIN(quota, current); + max =3D MAX(quota, current); + + pct =3D (max - min) * 100 / max; + + return pct > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT; +} + +static void dirtylimit_set_throttle(CPUState *cpu, + uint64_t quota, + uint64_t current) +{ + int64_t ring_full_time_us =3D 0; + uint64_t sleep_pct =3D 0; + uint64_t throttle_us =3D 0; + + ring_full_time_us =3D dirtylimit_dirty_ring_full_time(current); + + if (dirtylimit_need_linear_adjustment(quota, current)) { + if (quota < current) { + sleep_pct =3D (current - quota) * 100 / current; + throttle_us =3D + ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); + cpu->throttle_us_per_full +=3D throttle_us; + } else { + sleep_pct =3D (quota - current) * 100 / quota; + throttle_us =3D + ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); + cpu->throttle_us_per_full -=3D throttle_us; + } + + trace_dirtylimit_throttle_pct(cpu->cpu_index, + sleep_pct, + throttle_us); + } else { + if (quota < current) { + cpu->throttle_us_per_full +=3D ring_full_time_us / 10; + } else { + cpu->throttle_us_per_full -=3D ring_full_time_us / 10; + } + } + + cpu->throttle_us_per_full =3D MIN(cpu->throttle_us_per_full, + ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX); + + cpu->throttle_us_per_full =3D MAX(cpu->throttle_us_per_full, 0); +} + +static void dirtylimit_adjust_throttle(CPUState *cpu) +{ + uint64_t quota =3D 0; + uint64_t current =3D 0; + int cpu_index =3D cpu->cpu_index; + + quota =3D dirtylimit_vcpu_get_state(cpu_index)->quota; + current =3D vcpu_dirty_rate_get(cpu_index); + + if (current =3D=3D 0 && + dirtylimit_vcpu_get_state(cpu_index)->unmatched_cnt =3D=3D 0) { + cpu->throttle_us_per_full =3D 0; + goto end; + } else if (++dirtylimit_vcpu_get_state(cpu_index)->unmatched_cnt + < 2) { + goto end; + } else if (dirtylimit_done(quota, current)) { + goto end; + } else { + dirtylimit_vcpu_get_state(cpu_index)->unmatched_cnt =3D 0; + dirtylimit_set_throttle(cpu, quota, current); + } +end: + trace_dirtylimit_adjust_throttle(cpu_index, + quota, current, + cpu->throttle_us_per_full); + return; +} + +static void *dirtylimit_thread(void *opaque) +{ + CPUState *cpu; + + rcu_register_thread(); + + while (!qatomic_read(&dirtylimit_quit)) { + sleep(DIRTYLIMIT_CALC_TIME_MS / 1000); + + dirtylimit_state_lock(); + + if (!dirtylimit_in_service()) { + dirtylimit_state_unlock(); + break; + } + + CPU_FOREACH(cpu) { + if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { + continue; + } + dirtylimit_adjust_throttle(cpu); + } + dirtylimit_state_unlock(); + } + + rcu_unregister_thread(); + + return NULL; +} + +static void dirtylimit_thread_start(void) +{ + qatomic_set(&dirtylimit_quit, 0); + qemu_thread_create(&dirtylimit_thr, + "dirtylimit", + dirtylimit_thread, + NULL, + QEMU_THREAD_JOINABLE); +} + +static void dirtylimit_thread_stop(void) +{ + qatomic_set(&dirtylimit_quit, 1); + qemu_mutex_unlock_iothread(); + qemu_thread_join(&dirtylimit_thr); + qemu_mutex_lock_iothread(); +} + +void dirtylimit_set_vcpu(int cpu_index, + uint64_t quota, + bool enable) +{ + trace_dirtylimit_set_vcpu(cpu_index, quota); + + if (enable) { + if (dirtylimit_in_service()) { + /* only set the vcpu dirty page rate limit */ + dirtylimit_vcpu_set_quota(cpu_index, quota, true); + return; + } + + /* initialize state when set dirtylimit first time */ + dirtylimit_state_lock(); + dirtylimit_state_initialize(); + dirtylimit_vcpu_set_quota(cpu_index, quota, true); + dirtylimit_state_unlock(); + + dirtylimit_thread_start(); + } else { + if (!dirtylimit_in_service()) { + return; + } + + dirtylimit_state_lock(); + /* dirty page rate limit is not enabled */ + if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) { + dirtylimit_state_unlock(); + return; + } + + /* switch off vcpu dirty page rate limit */ + dirtylimit_vcpu_set_quota(cpu_index, 0, false); + dirtylimit_state_unlock(); + + if (!dirtylimit_state->limited_nvcpu) { + dirtylimit_thread_stop(); + + dirtylimit_state_lock(); + dirtylimit_state_finalize(); + dirtylimit_state_unlock(); + } + } +} + +void dirtylimit_set_all(uint64_t quota, + bool enable) +{ + MachineState *ms =3D MACHINE(qdev_get_machine()); + int max_cpus =3D ms->smp.max_cpus; + int i; + + for (i =3D 0; i < max_cpus; i++) { + dirtylimit_set_vcpu(i, quota, enable); + } +} + +void dirtylimit_vcpu_execute(CPUState *cpu) +{ + if (dirtylimit_in_service() && + dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled && + cpu->throttle_us_per_full) { + trace_dirtylimit_vcpu_execute(cpu->cpu_index, + cpu->throttle_us_per_full); + usleep(cpu->throttle_us_per_full); + } +} diff --git a/softmmu/trace-events b/softmmu/trace-events index 9c88887..ff441ac 100644 --- a/softmmu/trace-events +++ b/softmmu/trace-events @@ -31,3 +31,11 @@ runstate_set(int current_state, const char *current_stat= e_str, int new_state, co system_wakeup_request(int reason) "reason=3D%d" qemu_system_shutdown_request(int reason) "reason=3D%d" qemu_system_powerdown_request(void) "" + +#dirtylimit.c +dirtylimit_state_initialize(int max_cpus) "dirtylimit state initialize: ma= x cpus %d" +dirtylimit_state_finalize(void) +dirtylimit_adjust_throttle(int cpu_index, uint64_t quota, uint64_t current= , int64_t time_us) "CPU[%d] throttle: quota %" PRIu64 ", current %" PRIu64 = ", throttle %"PRIi64 " us" +dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU= [%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us" +dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page= rate limit %"PRIu64 +dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sle= ep %"PRIi64 " us" --=20 1.8.3.1 From nobody Mon May 6 21:27:55 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1641316808791801.4551113055185; Tue, 4 Jan 2022 09:20:08 -0800 (PST) Received: from localhost ([::1]:42476 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1n4nTj-0004AM-IJ for importer@patchew.org; Tue, 04 Jan 2022 12:20:07 -0500 Received: from eggs.gnu.org ([209.51.188.92]:55510) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1n4nOT-0004CY-AL for qemu-devel@nongnu.org; Tue, 04 Jan 2022 12:14:41 -0500 Received: from prt-mail.chinatelecom.cn ([42.123.76.226]:48564 helo=chinatelecom.cn) by eggs.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1n4nOP-00009b-KB for qemu-devel@nongnu.org; Tue, 04 Jan 2022 12:14:40 -0500 Received: from clientip-125.69.40.99 (unknown [172.18.0.218]) by chinatelecom.cn (HERMES) with SMTP id 50D03280029; Wed, 5 Jan 2022 01:14:33 +0800 (CST) Received: from ([172.18.0.218]) by app0025 with ESMTP id 32b1d01a0447457bb4731a2a837a3495 for qemu-devel@nongnu.org; Wed, 05 Jan 2022 01:14:36 CST HMM_SOURCE_IP: 172.18.0.218:33794.643395762 HMM_ATTACHE_NUM: 0000 HMM_SOURCE_TYPE: SMTP X-189-SAVE-TO-SEND: +huangy81@chinatelecom.cn X-Transaction-ID: 32b1d01a0447457bb4731a2a837a3495 X-Real-From: huangy81@chinatelecom.cn X-Receive-IP: 172.18.0.218 X-MEDUSA-Status: 0 From: huangy81@chinatelecom.cn To: qemu-devel Subject: [PATCH v11 4/4] softmmu/dirtylimit: implement dirty page rate limit Date: Wed, 5 Jan 2022 01:14:09 +0800 Message-Id: <3c7304f10b44919bb328a62c2fae988e2abf2a6a.1641316375.git.huangy81@chinatelecom.cn> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: References: In-Reply-To: References: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=42.123.76.226; envelope-from=huangy81@chinatelecom.cn; helo=chinatelecom.cn X-Spam_score_int: 0 X-Spam_score: -0.0 X-Spam_bar: / X-Spam_report: (-0.0 / 5.0 requ) SPF_HELO_PASS=-0.001, SPF_PASS=-0.001 autolearn=unavailable autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Eduardo Habkost , David Hildenbrand , Hyman , Juan Quintela , Richard Henderson , Markus ArmBruster , Peter Xu , "Dr. David Alan Gilbert" , Paolo Bonzini , =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZM-MESSAGEID: 1641316811132100001 From: Hyman Huang(=E9=BB=84=E5=8B=87) Implement dirtyrate calculation periodically basing on dirty-ring and throttle virtual CPU until it reachs the quota dirty page rate given by user. Introduce qmp commands "set-vcpu-dirty-limit", "cancel-vcpu-dirty-limit", "query-vcpu-dirty-limit" to enable, disable, query dirty page limit for virtual CPU. Meanwhile, introduce corresponding hmp commands "set_vcpu_dirty_limit", "cancel_vcpu_dirty_limit", "info vcpu_dirty_limit" so the feature can be more usable. Signed-off-by: Hyman Huang(=E9=BB=84=E5=8B=87) --- hmp-commands-info.hx | 13 ++++ hmp-commands.hx | 32 ++++++++++ include/monitor/hmp.h | 3 + qapi/migration.json | 60 ++++++++++++++++++ softmmu/dirtylimit.c | 167 ++++++++++++++++++++++++++++++++++++++++++++++= ++++ 5 files changed, 275 insertions(+) diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index 407a1da..5dd3001 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -863,6 +863,19 @@ SRST Display the vcpu dirty rate information. ERST =20 + { + .name =3D "vcpu_dirty_limit", + .args_type =3D "", + .params =3D "", + .help =3D "show dirty page limit information of all vCPU", + .cmd =3D hmp_info_vcpu_dirty_limit, + }, + +SRST + ``info vcpu_dirty_limit`` + Display the vcpu dirty page limit information. +ERST + #if defined(TARGET_I386) { .name =3D "sgx", diff --git a/hmp-commands.hx b/hmp-commands.hx index 70a9136..416982c 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1744,3 +1744,35 @@ ERST "\n\t\t\t -b to specify dirty bitmap as method of ca= lculation)", .cmd =3D hmp_calc_dirty_rate, }, + +SRST +``set_vcpu_dirty_limit`` + Set dirty page rate limit on virtual CPU, the information about all the + virtual CPU dirty limit status can be observed with ``info vcpu_dirty_li= mit`` + command. +ERST + + { + .name =3D "set_vcpu_dirty_limit", + .args_type =3D "dirty_rate:l,cpu_index:l?", + .params =3D "dirty_rate [cpu_index]", + .help =3D "set dirty page rate limit, use cpu_index to set l= imit on a " + "\n\t\t specified virtual cpu", + .cmd =3D hmp_set_vcpu_dirty_limit, + }, + +SRST +``cancel_vcpu_dirty_limit`` + Cancel dirty page rate limit on virtual CPU, the information about all t= he + virtual CPU dirty limit status can be observed with ``info vcpu_dirty_li= mit`` + command. +ERST + + { + .name =3D "cancel_vcpu_dirty_limit", + .args_type =3D "cpu_index:l?", + .params =3D "[cpu_index]", + .help =3D "cancel dirty page rate limit, use cpu_index to ca= ncel limit " + "\n\t\t on a specified virtual cpu", + .cmd =3D hmp_cancel_vcpu_dirty_limit, + }, diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h index 96d0148..478820e 100644 --- a/include/monitor/hmp.h +++ b/include/monitor/hmp.h @@ -131,6 +131,9 @@ void hmp_replay_delete_break(Monitor *mon, const QDict = *qdict); void hmp_replay_seek(Monitor *mon, const QDict *qdict); void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict); void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict); +void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict); +void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict); +void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict); void hmp_human_readable_text_helper(Monitor *mon, HumanReadableText *(*qmp_handler)(Erro= r **)); =20 diff --git a/qapi/migration.json b/qapi/migration.json index ac5fa56..9d406f4 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1869,6 +1869,66 @@ 'current-rate': 'int64' } } =20 ## +# @set-vcpu-dirty-limit: +# +# Set the upper limit of dirty page rate for virtual CPU. +# +# Requires KVM with accelerator property "dirty-ring-size" set. +# A virtual CPU's dirty page rate is a measure of its memory load. +# To observe dirty page rates, use @calc-dirty-rate. +# +# @cpu-index: index of virtual CPU, default is all. +# +# @dirty-rate: upper limit of dirty page rate for virtual CPU. +# +# Since: 7.0 +# +# Example: +# {"execute": "set-vcpu-dirty-limit"} +# "arguments": { "dirty-rate": 200, +# "cpu-index": 1 } } +# +## +{ 'command': 'set-vcpu-dirty-limit', + 'data': { '*cpu-index': 'uint64', + 'dirty-rate': 'uint64' } } + +## +# @cancel-vcpu-dirty-limit: +# +# Cancel the upper limit of dirty page rate for virtual CPU. +# +# Cancel the dirty page limit for the vCPU which has been set with +# set-vcpu-dirty-limit command. Note that this command requires +# support from dirty ring, same as the "set-vcpu-dirty-limit". +# +# @cpu-index: index of virtual CPU, default is all. +# +# Since: 7.0 +# +# Example: +# {"execute": "cancel-vcpu-dirty-limit"} +# "arguments": { "cpu-index": 1 } } +# +## +{ 'command': 'cancel-vcpu-dirty-limit', + 'data': { '*cpu-index': 'uint64'} } + +## +# @query-vcpu-dirty-limit: +# +# Returns information about all virtual CPU dirty limit if enabled. +# +# Since: 7.0 +# +# Example: +# {"execute": "query-vcpu-dirty-limit"} +# +## +{ 'command': 'query-vcpu-dirty-limit', + 'returns': [ 'DirtyLimitInfo' ] } + +## # @snapshot-save: # # Save a VM snapshot diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c index c9f5745..071f3b9 100644 --- a/softmmu/dirtylimit.c +++ b/softmmu/dirtylimit.c @@ -14,8 +14,12 @@ #include "qapi/error.h" #include "qemu/main-loop.h" #include "qapi/qapi-commands-migration.h" +#include "qapi/qmp/qdict.h" +#include "qapi/error.h" #include "sysemu/dirtyrate.h" #include "sysemu/dirtylimit.h" +#include "monitor/hmp.h" +#include "monitor/monitor.h" #include "exec/memory.h" #include "hw/boards.h" #include "sysemu/kvm.h" @@ -475,3 +479,166 @@ void dirtylimit_vcpu_execute(CPUState *cpu) usleep(cpu->throttle_us_per_full); } } + +void qmp_set_vcpu_dirty_limit(bool has_cpu_index, + uint64_t cpu_index, + uint64_t dirty_rate, + Error **errp) +{ + if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { + error_setg(errp, "dirty page limit feature requires KVM with" + " accelerator property 'dirty-ring-size' set'"); + return; + } + + if (!dirtylimit_in_service()) { + vcpu_dirty_rate_stat_initialize(); + vcpu_dirty_rate_stat_start(); + } + + if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { + error_setg(errp, "incorrect cpu index specified"); + return; + } + + if (has_cpu_index) { + dirtylimit_set_vcpu(cpu_index, dirty_rate, true); + } else { + dirtylimit_set_all(dirty_rate, true); + } +} + +void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index, + uint64_t cpu_index, + Error **errp) +{ + if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { + return; + } + + if (!dirtylimit_in_service()) { + return; + } + + if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { + error_setg(errp, "incorrect cpu index specified"); + return; + } + + if (has_cpu_index) { + dirtylimit_set_vcpu(cpu_index, 0, false); + } else { + dirtylimit_set_all(0, false); + } + + if (!dirtylimit_in_service()) { + vcpu_dirty_rate_stat_stop(); + vcpu_dirty_rate_stat_finalize(); + } +} + +void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) +{ + int64_t dirty_rate =3D qdict_get_int(qdict, "dirty_rate"); + int64_t cpu_index =3D qdict_get_try_int(qdict, "cpu_index", -1); + Error *err =3D NULL; + + qmp_set_vcpu_dirty_limit(!!(cpu_index !=3D -1), cpu_index, dirty_rate,= &err); + if (err) { + hmp_handle_error(mon, err); + return; + } + + monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " + "dirty limit for virtual CPU]\n"); +} + +void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) +{ + int64_t cpu_index =3D qdict_get_try_int(qdict, "cpu_index", -1); + Error *err =3D NULL; + + qmp_cancel_vcpu_dirty_limit(!!(cpu_index !=3D -1), cpu_index, &err); + if (err) { + hmp_handle_error(mon, err); + return; + } + + monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " + "dirty limit for virtual CPU]\n"); +} + +static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index) +{ + DirtyLimitInfo *info =3D NULL; + + info =3D g_malloc0(sizeof(*info)); + info->cpu_index =3D cpu_index; + info->limit_rate =3D dirtylimit_vcpu_get_state(cpu_index)->quota; + info->current_rate =3D vcpu_dirty_rate_get(cpu_index); + + return info; +} + +static struct DirtyLimitInfoList *dirtylimit_query_all(void) +{ + int i, index; + DirtyLimitInfo *info =3D NULL; + DirtyLimitInfoList *head =3D NULL, **tail =3D &head; + + dirtylimit_state_lock(); + + if (!dirtylimit_in_service()) { + return NULL; + } + + for (i =3D 0; i < dirtylimit_state->max_cpus; i++) { + index =3D dirtylimit_state->states[i].cpu_index; + if (dirtylimit_vcpu_get_state(index)->enabled) { + info =3D dirtylimit_query_vcpu(index); + QAPI_LIST_APPEND(tail, info); + } + } + + dirtylimit_state_unlock(); + + return head; +} + +struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp) +{ + if (!dirtylimit_in_service()) { + error_setg(errp, "dirty page limit not enabled"); + return NULL; + } + + return dirtylimit_query_all(); +} + +void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) +{ + DirtyLimitInfoList *limit, *head, *info =3D NULL; + Error *err =3D NULL; + + if (!dirtylimit_in_service()) { + monitor_printf(mon, "Dirty page limit not enabled!\n"); + return; + } + + info =3D qmp_query_vcpu_dirty_limit(&err); + if (err) { + hmp_handle_error(mon, err); + return; + } + + head =3D info; + for (limit =3D head; limit !=3D NULL; limit =3D limit->next) { + monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s)= ," + " current rate %"PRIi64 " (MB/s)\n", + limit->value->cpu_index, + limit->value->limit_rate, + limit->value->current_rate); + } + + g_free(info); +} --=20 1.8.3.1