From: Hyman Huang <yong.huang@smartx.com>
KVM always returns 1 when userspace retrieves a dirty bitmap for
the first time when KVM_DIRTY_LOG_INITIALLY_SET is enabled; in such
scenario, the RAMBlock dirty sync of the initial iteration can be
skipped.
Signed-off-by: Hyman Huang <yong.huang@smartx.com>
---
migration/cpu-throttle.c | 3 ++-
migration/ram.c | 30 +++++++++++++++++++++++++++---
2 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
index 342681cdd4..06e3b1be78 100644
--- a/migration/cpu-throttle.c
+++ b/migration/cpu-throttle.c
@@ -27,6 +27,7 @@
#include "hw/core/cpu.h"
#include "qemu/main-loop.h"
#include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
#include "cpu-throttle.h"
#include "migration.h"
#include "migration-stats.h"
@@ -141,7 +142,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
* effect on guest performance, therefore omit it to avoid
* paying extra for the sync penalty.
*/
- if (sync_cnt <= 1) {
+ if (sync_cnt <= (kvm_dirty_log_manual_enabled() ? 0 : 1)) {
goto end;
}
diff --git a/migration/ram.c b/migration/ram.c
index d284f63854..b312ebd69d 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
{
MigrationState *ms = migrate_get_current();
RAMBlock *block;
- unsigned long pages;
+ unsigned long pages, clear_bmap_pages;
uint8_t shift;
/* Skip setting bitmap if there is no RAM */
@@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
RAMBLOCK_FOREACH_NOT_IGNORED(block) {
pages = block->max_length >> TARGET_PAGE_BITS;
+ clear_bmap_pages = clear_bmap_size(pages, shift);
/*
* The initial dirty bitmap for migration must be set with all
* ones to make sure we'll migrate every guest RAM page to
@@ -2751,7 +2752,17 @@ static void ram_list_init_bitmaps(void)
block->file_bmap = bitmap_new(pages);
}
block->clear_bmap_shift = shift;
- block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
+ block->clear_bmap = bitmap_new(clear_bmap_pages);
+ /*
+ * Set the clear bitmap by default to enable dirty logging.
+ *
+ * Note that with KVM_DIRTY_LOG_INITIALLY_SET, dirty logging
+ * will be enabled gradually in small chunks using
+ * KVM_CLEAR_DIRTY_LOG
+ */
+ if (kvm_dirty_log_manual_enabled()) {
+ bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
+ }
}
}
}
@@ -2771,6 +2782,7 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
static bool ram_init_bitmaps(RAMState *rs, Error **errp)
{
+ Error *local_err = NULL;
bool ret = true;
qemu_mutex_lock_ramlist();
@@ -2783,7 +2795,19 @@ static bool ram_init_bitmaps(RAMState *rs, Error **errp)
if (!ret) {
goto out_unlock;
}
- migration_bitmap_sync_precopy(false);
+
+ if (kvm_dirty_log_manual_enabled()) {
+ /*
+ * Bypass the RAMBlock dirty sync and still publish a
+ * notification.
+ */
+ if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC,
+ &local_err)) {
+ error_report_err(local_err);
+ }
+ } else {
+ migration_bitmap_sync_precopy(false);
+ }
}
}
out_unlock:
--
2.27.0
On Wed, Oct 23, 2024 at 10:09:51AM +0800, yong.huang@smartx.com wrote: > From: Hyman Huang <yong.huang@smartx.com> > > KVM always returns 1 when userspace retrieves a dirty bitmap for > the first time when KVM_DIRTY_LOG_INITIALLY_SET is enabled; in such > scenario, the RAMBlock dirty sync of the initial iteration can be > skipped. > > Signed-off-by: Hyman Huang <yong.huang@smartx.com> > --- > migration/cpu-throttle.c | 3 ++- > migration/ram.c | 30 +++++++++++++++++++++++++++--- > 2 files changed, 29 insertions(+), 4 deletions(-) > > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c > index 342681cdd4..06e3b1be78 100644 > --- a/migration/cpu-throttle.c > +++ b/migration/cpu-throttle.c > @@ -27,6 +27,7 @@ > #include "hw/core/cpu.h" > #include "qemu/main-loop.h" > #include "sysemu/cpus.h" > +#include "sysemu/kvm.h" > #include "cpu-throttle.h" > #include "migration.h" > #include "migration-stats.h" > @@ -141,7 +142,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque) > * effect on guest performance, therefore omit it to avoid > * paying extra for the sync penalty. > */ > - if (sync_cnt <= 1) { > + if (sync_cnt <= (kvm_dirty_log_manual_enabled() ? 0 : 1)) { > goto end; > } > > diff --git a/migration/ram.c b/migration/ram.c > index d284f63854..b312ebd69d 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void) > { > MigrationState *ms = migrate_get_current(); > RAMBlock *block; > - unsigned long pages; > + unsigned long pages, clear_bmap_pages; > uint8_t shift; > > /* Skip setting bitmap if there is no RAM */ > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void) > > RAMBLOCK_FOREACH_NOT_IGNORED(block) { > pages = block->max_length >> TARGET_PAGE_BITS; > + clear_bmap_pages = clear_bmap_size(pages, shift); > /* > * The initial dirty bitmap for migration must be set with all > * ones to make sure we'll migrate every guest RAM page to > @@ -2751,7 +2752,17 @@ static void ram_list_init_bitmaps(void) > block->file_bmap = bitmap_new(pages); > } > block->clear_bmap_shift = shift; > - block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift)); > + block->clear_bmap = bitmap_new(clear_bmap_pages); > + /* > + * Set the clear bitmap by default to enable dirty logging. > + * > + * Note that with KVM_DIRTY_LOG_INITIALLY_SET, dirty logging > + * will be enabled gradually in small chunks using > + * KVM_CLEAR_DIRTY_LOG > + */ > + if (kvm_dirty_log_manual_enabled()) { > + bitmap_set(block->clear_bmap, 0, clear_bmap_pages); > + } Why it needs to be relevant to whether DIRTY_LOG is enabled? I wonder if we should always set clear_bmap to 1 unconditionally, as we always set bmap to all 1s by default. Then we skip sync always during setup, dropping patch 1. > } > } > } > @@ -2771,6 +2782,7 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs) > > static bool ram_init_bitmaps(RAMState *rs, Error **errp) > { > + Error *local_err = NULL; > bool ret = true; > > qemu_mutex_lock_ramlist(); > @@ -2783,7 +2795,19 @@ static bool ram_init_bitmaps(RAMState *rs, Error **errp) > if (!ret) { > goto out_unlock; > } > - migration_bitmap_sync_precopy(false); > + > + if (kvm_dirty_log_manual_enabled()) { > + /* > + * Bypass the RAMBlock dirty sync and still publish a > + * notification. > + */ > + if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, > + &local_err)) { > + error_report_err(local_err); > + } > + } else { > + migration_bitmap_sync_precopy(false); > + } > } > } > out_unlock: > -- > 2.27.0 > -- Peter Xu
On Wed, Oct 30, 2024 at 12:21 AM Peter Xu <peterx@redhat.com> wrote: > On Wed, Oct 23, 2024 at 10:09:51AM +0800, yong.huang@smartx.com wrote: > > From: Hyman Huang <yong.huang@smartx.com> > > > > KVM always returns 1 when userspace retrieves a dirty bitmap for > > the first time when KVM_DIRTY_LOG_INITIALLY_SET is enabled; in such > > scenario, the RAMBlock dirty sync of the initial iteration can be > > skipped. > > > > Signed-off-by: Hyman Huang <yong.huang@smartx.com> > > --- > > migration/cpu-throttle.c | 3 ++- > > migration/ram.c | 30 +++++++++++++++++++++++++++--- > > 2 files changed, 29 insertions(+), 4 deletions(-) > > > > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c > > index 342681cdd4..06e3b1be78 100644 > > --- a/migration/cpu-throttle.c > > +++ b/migration/cpu-throttle.c > > @@ -27,6 +27,7 @@ > > #include "hw/core/cpu.h" > > #include "qemu/main-loop.h" > > #include "sysemu/cpus.h" > > +#include "sysemu/kvm.h" > > #include "cpu-throttle.h" > > #include "migration.h" > > #include "migration-stats.h" > > @@ -141,7 +142,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque) > > * effect on guest performance, therefore omit it to avoid > > * paying extra for the sync penalty. > > */ > > - if (sync_cnt <= 1) { > > + if (sync_cnt <= (kvm_dirty_log_manual_enabled() ? 0 : 1)) { > > goto end; > > } > > > > diff --git a/migration/ram.c b/migration/ram.c > > index d284f63854..b312ebd69d 100644 > > --- a/migration/ram.c > > +++ b/migration/ram.c > > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void) > > { > > MigrationState *ms = migrate_get_current(); > > RAMBlock *block; > > - unsigned long pages; > > + unsigned long pages, clear_bmap_pages; > > uint8_t shift; > > > > /* Skip setting bitmap if there is no RAM */ > > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void) > > > > RAMBLOCK_FOREACH_NOT_IGNORED(block) { > > pages = block->max_length >> TARGET_PAGE_BITS; > > + clear_bmap_pages = clear_bmap_size(pages, shift); > > /* > > * The initial dirty bitmap for migration must be set with > all > > * ones to make sure we'll migrate every guest RAM page to > > @@ -2751,7 +2752,17 @@ static void ram_list_init_bitmaps(void) > > block->file_bmap = bitmap_new(pages); > > } > > block->clear_bmap_shift = shift; > > - block->clear_bmap = bitmap_new(clear_bmap_size(pages, > shift)); > > + block->clear_bmap = bitmap_new(clear_bmap_pages); > > + /* > > + * Set the clear bitmap by default to enable dirty logging. > > + * > > + * Note that with KVM_DIRTY_LOG_INITIALLY_SET, dirty logging > > + * will be enabled gradually in small chunks using > > + * KVM_CLEAR_DIRTY_LOG > > + */ > > + if (kvm_dirty_log_manual_enabled()) { > > + bitmap_set(block->clear_bmap, 0, clear_bmap_pages); > > + } > > Why it needs to be relevant to whether DIRTY_LOG is enabled? > > I wonder if we should always set clear_bmap to 1 unconditionally, as we > always set bmap to all 1s by default. > OK, this works. We can drop it. > > Then we skip sync always during setup, dropping patch 1. > IIUC, KVM initializes the slot->dirty_bitmap with 1 only when KVM_DIRTY_LOG_INITIALLY_SET is enabled, 0 otherwize. This means that if KVM does not support the KVM_DIRTY_LOG_INITIALLY_SET feature, userspace should do the first sync so that KVM could set the WP bit and clear the D-bit of the PTE. Skipping first sync could handle this scenario? > > } > > } > > } > > @@ -2771,6 +2782,7 @@ static void > migration_bitmap_clear_discarded_pages(RAMState *rs) > > > > static bool ram_init_bitmaps(RAMState *rs, Error **errp) > > { > > + Error *local_err = NULL; > > bool ret = true; > > > > qemu_mutex_lock_ramlist(); > > @@ -2783,7 +2795,19 @@ static bool ram_init_bitmaps(RAMState *rs, Error > **errp) > > if (!ret) { > > goto out_unlock; > > } > > - migration_bitmap_sync_precopy(false); > > + > > + if (kvm_dirty_log_manual_enabled()) { > > + /* > > + * Bypass the RAMBlock dirty sync and still publish a > > + * notification. > > + */ > > + if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, > > + &local_err)) { > > + error_report_err(local_err); > > + } > > + } else { > > + migration_bitmap_sync_precopy(false); > > + } > > } > > } > > out_unlock: > > -- > > 2.27.0 > > > > -- > Peter Xu > > -- Best regards
On Wed, Oct 30, 2024 at 10:09:38AM +0800, Yong Huang wrote: > On Wed, Oct 30, 2024 at 12:21 AM Peter Xu <peterx@redhat.com> wrote: > > > On Wed, Oct 23, 2024 at 10:09:51AM +0800, yong.huang@smartx.com wrote: > > > From: Hyman Huang <yong.huang@smartx.com> > > > > > > KVM always returns 1 when userspace retrieves a dirty bitmap for > > > the first time when KVM_DIRTY_LOG_INITIALLY_SET is enabled; in such > > > scenario, the RAMBlock dirty sync of the initial iteration can be > > > skipped. > > > > > > Signed-off-by: Hyman Huang <yong.huang@smartx.com> > > > --- > > > migration/cpu-throttle.c | 3 ++- > > > migration/ram.c | 30 +++++++++++++++++++++++++++--- > > > 2 files changed, 29 insertions(+), 4 deletions(-) > > > > > > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c > > > index 342681cdd4..06e3b1be78 100644 > > > --- a/migration/cpu-throttle.c > > > +++ b/migration/cpu-throttle.c > > > @@ -27,6 +27,7 @@ > > > #include "hw/core/cpu.h" > > > #include "qemu/main-loop.h" > > > #include "sysemu/cpus.h" > > > +#include "sysemu/kvm.h" > > > #include "cpu-throttle.h" > > > #include "migration.h" > > > #include "migration-stats.h" > > > @@ -141,7 +142,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque) > > > * effect on guest performance, therefore omit it to avoid > > > * paying extra for the sync penalty. > > > */ > > > - if (sync_cnt <= 1) { > > > + if (sync_cnt <= (kvm_dirty_log_manual_enabled() ? 0 : 1)) { > > > goto end; > > > } > > > > > > diff --git a/migration/ram.c b/migration/ram.c > > > index d284f63854..b312ebd69d 100644 > > > --- a/migration/ram.c > > > +++ b/migration/ram.c > > > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void) > > > { > > > MigrationState *ms = migrate_get_current(); > > > RAMBlock *block; > > > - unsigned long pages; > > > + unsigned long pages, clear_bmap_pages; > > > uint8_t shift; > > > > > > /* Skip setting bitmap if there is no RAM */ > > > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void) > > > > > > RAMBLOCK_FOREACH_NOT_IGNORED(block) { > > > pages = block->max_length >> TARGET_PAGE_BITS; > > > + clear_bmap_pages = clear_bmap_size(pages, shift); > > > /* > > > * The initial dirty bitmap for migration must be set with > > all > > > * ones to make sure we'll migrate every guest RAM page to > > > @@ -2751,7 +2752,17 @@ static void ram_list_init_bitmaps(void) > > > block->file_bmap = bitmap_new(pages); > > > } > > > block->clear_bmap_shift = shift; > > > - block->clear_bmap = bitmap_new(clear_bmap_size(pages, > > shift)); > > > + block->clear_bmap = bitmap_new(clear_bmap_pages); > > > + /* > > > + * Set the clear bitmap by default to enable dirty logging. > > > + * > > > + * Note that with KVM_DIRTY_LOG_INITIALLY_SET, dirty logging > > > + * will be enabled gradually in small chunks using > > > + * KVM_CLEAR_DIRTY_LOG > > > + */ > > > + if (kvm_dirty_log_manual_enabled()) { > > > + bitmap_set(block->clear_bmap, 0, clear_bmap_pages); > > > + } > > > > Why it needs to be relevant to whether DIRTY_LOG is enabled? > > > > I wonder if we should always set clear_bmap to 1 unconditionally, as we > > always set bmap to all 1s by default. > > > > OK, this works. We can drop it. > > > > > > Then we skip sync always during setup, dropping patch 1. > > > > IIUC, KVM initializes the slot->dirty_bitmap with 1 only when > KVM_DIRTY_LOG_INITIALLY_SET is enabled, 0 otherwize. > This means that if KVM does not support the > KVM_DIRTY_LOG_INITIALLY_SET feature, userspace should > do the first sync so that KVM could set the WP bit and clear > the D-bit of the PTE. > > Skipping first sync could handle this scenario? Yes, the old kernels could be tricky (!CLEAR_LOG support), but I hope it's also working. The thing is log_start() should also protect all pages if that's the case. For x86, that should corresponds to: kvm_mmu_slot_apply_flags(): /* * Initially-all-set does not require write protecting any page, * because they're all assumed to be dirty. */ if (kvm_dirty_log_manual_protect_and_init_set(kvm)) return; if (READ_ONCE(eager_page_split)) kvm_mmu_slot_try_split_huge_pages(kvm, new, PG_LEVEL_4K); if (kvm_x86_ops.cpu_dirty_log_size) { kvm_mmu_slot_leaf_clear_dirty(kvm, new); kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_2M); } else { kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K); } In general, I think even if GET_DIRTY_LOG in setup() is omitted, all pages should still be wr-protected already right after log_start(). Then follow up log_clear()s will be noop, until the next sync which will reprotect every page again. Please double check. Thanks, -- Peter Xu
On Thu, Oct 31, 2024 at 3:43 AM Peter Xu <peterx@redhat.com> wrote: > On Wed, Oct 30, 2024 at 10:09:38AM +0800, Yong Huang wrote: > > On Wed, Oct 30, 2024 at 12:21 AM Peter Xu <peterx@redhat.com> wrote: > > > > > On Wed, Oct 23, 2024 at 10:09:51AM +0800, yong.huang@smartx.com wrote: > > > > From: Hyman Huang <yong.huang@smartx.com> > > > > > > > > KVM always returns 1 when userspace retrieves a dirty bitmap for > > > > the first time when KVM_DIRTY_LOG_INITIALLY_SET is enabled; in such > > > > scenario, the RAMBlock dirty sync of the initial iteration can be > > > > skipped. > > > > > > > > Signed-off-by: Hyman Huang <yong.huang@smartx.com> > > > > --- > > > > migration/cpu-throttle.c | 3 ++- > > > > migration/ram.c | 30 +++++++++++++++++++++++++++--- > > > > 2 files changed, 29 insertions(+), 4 deletions(-) > > > > > > > > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c > > > > index 342681cdd4..06e3b1be78 100644 > > > > --- a/migration/cpu-throttle.c > > > > +++ b/migration/cpu-throttle.c > > > > @@ -27,6 +27,7 @@ > > > > #include "hw/core/cpu.h" > > > > #include "qemu/main-loop.h" > > > > #include "sysemu/cpus.h" > > > > +#include "sysemu/kvm.h" > > > > #include "cpu-throttle.h" > > > > #include "migration.h" > > > > #include "migration-stats.h" > > > > @@ -141,7 +142,7 @@ void cpu_throttle_dirty_sync_timer_tick(void > *opaque) > > > > * effect on guest performance, therefore omit it to avoid > > > > * paying extra for the sync penalty. > > > > */ > > > > - if (sync_cnt <= 1) { > > > > + if (sync_cnt <= (kvm_dirty_log_manual_enabled() ? 0 : 1)) { > > > > goto end; > > > > } > > > > > > > > diff --git a/migration/ram.c b/migration/ram.c > > > > index d284f63854..b312ebd69d 100644 > > > > --- a/migration/ram.c > > > > +++ b/migration/ram.c > > > > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void) > > > > { > > > > MigrationState *ms = migrate_get_current(); > > > > RAMBlock *block; > > > > - unsigned long pages; > > > > + unsigned long pages, clear_bmap_pages; > > > > uint8_t shift; > > > > > > > > /* Skip setting bitmap if there is no RAM */ > > > > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void) > > > > > > > > RAMBLOCK_FOREACH_NOT_IGNORED(block) { > > > > pages = block->max_length >> TARGET_PAGE_BITS; > > > > + clear_bmap_pages = clear_bmap_size(pages, shift); > > > > /* > > > > * The initial dirty bitmap for migration must be set > with > > > all > > > > * ones to make sure we'll migrate every guest RAM page > to > > > > @@ -2751,7 +2752,17 @@ static void ram_list_init_bitmaps(void) > > > > block->file_bmap = bitmap_new(pages); > > > > } > > > > block->clear_bmap_shift = shift; > > > > - block->clear_bmap = bitmap_new(clear_bmap_size(pages, > > > shift)); > > > > + block->clear_bmap = bitmap_new(clear_bmap_pages); > > > > + /* > > > > + * Set the clear bitmap by default to enable dirty > logging. > > > > + * > > > > + * Note that with KVM_DIRTY_LOG_INITIALLY_SET, dirty > logging > > > > + * will be enabled gradually in small chunks using > > > > + * KVM_CLEAR_DIRTY_LOG > > > > + */ > > > > + if (kvm_dirty_log_manual_enabled()) { > > > > + bitmap_set(block->clear_bmap, 0, clear_bmap_pages); > > > > + } > > > > > > Why it needs to be relevant to whether DIRTY_LOG is enabled? > > > > > > I wonder if we should always set clear_bmap to 1 unconditionally, as we > > > always set bmap to all 1s by default. > > > > > > > OK, this works. We can drop it. > > > > > > > > > > Then we skip sync always during setup, dropping patch 1. > > > > > > > IIUC, KVM initializes the slot->dirty_bitmap with 1 only when > > KVM_DIRTY_LOG_INITIALLY_SET is enabled, 0 otherwize. > > This means that if KVM does not support the > > KVM_DIRTY_LOG_INITIALLY_SET feature, userspace should > > do the first sync so that KVM could set the WP bit and clear > > the D-bit of the PTE. > > > > Skipping first sync could handle this scenario? > > Yes, the old kernels could be tricky (!CLEAR_LOG support), but I hope it's > also working. > > The thing is log_start() should also protect all pages if that's the case. > For x86, that should corresponds to: > > kvm_mmu_slot_apply_flags(): > > /* > * Initially-all-set does not require write protecting any > page, > * because they're all assumed to be dirty. > */ > if (kvm_dirty_log_manual_protect_and_init_set(kvm)) > return; > > if (READ_ONCE(eager_page_split)) > kvm_mmu_slot_try_split_huge_pages(kvm, new, > PG_LEVEL_4K); > > if (kvm_x86_ops.cpu_dirty_log_size) { > kvm_mmu_slot_leaf_clear_dirty(kvm, new); > kvm_mmu_slot_remove_write_access(kvm, new, > PG_LEVEL_2M); > } else { > kvm_mmu_slot_remove_write_access(kvm, new, > PG_LEVEL_4K); > } > This path does the write protecting indeed. Thanks for pointing this out. > > In general, I think even if GET_DIRTY_LOG in setup() is omitted, all pages > should still be wr-protected already right after log_start(). Then follow > up log_clear()s will be noop, until the next sync which will reprotect > every page again. > Agree. I'll drop the patch 1 and simplify the patch 2 in the next version. Thanks for the comments. > > Please double check. > > Thanks, > > -- > Peter Xu > > Yong -- Best regards
© 2016 - 2024 Red Hat, Inc.