[PATCH RFC 2/2] migration: Avoid doing RAMBlock dirty sync in the initial iteration

yong.huang@smartx.com posted 2 patches 1 month ago
[PATCH RFC 2/2] migration: Avoid doing RAMBlock dirty sync in the initial iteration
Posted by yong.huang@smartx.com 1 month ago
From: Hyman Huang <yong.huang@smartx.com>

KVM always returns 1 when userspace retrieves a dirty bitmap for
the first time when KVM_DIRTY_LOG_INITIALLY_SET is enabled; in such
scenario, the RAMBlock dirty sync of the initial iteration can be
skipped.

Signed-off-by: Hyman Huang <yong.huang@smartx.com>
---
 migration/cpu-throttle.c |  3 ++-
 migration/ram.c          | 30 +++++++++++++++++++++++++++---
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
index 342681cdd4..06e3b1be78 100644
--- a/migration/cpu-throttle.c
+++ b/migration/cpu-throttle.c
@@ -27,6 +27,7 @@
 #include "hw/core/cpu.h"
 #include "qemu/main-loop.h"
 #include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
 #include "cpu-throttle.h"
 #include "migration.h"
 #include "migration-stats.h"
@@ -141,7 +142,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
      * effect on guest performance, therefore omit it to avoid
      * paying extra for the sync penalty.
      */
-    if (sync_cnt <= 1) {
+    if (sync_cnt <= (kvm_dirty_log_manual_enabled() ? 0 : 1)) {
         goto end;
     }
 
diff --git a/migration/ram.c b/migration/ram.c
index d284f63854..b312ebd69d 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
 {
     MigrationState *ms = migrate_get_current();
     RAMBlock *block;
-    unsigned long pages;
+    unsigned long pages, clear_bmap_pages;
     uint8_t shift;
 
     /* Skip setting bitmap if there is no RAM */
@@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
 
         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
             pages = block->max_length >> TARGET_PAGE_BITS;
+            clear_bmap_pages = clear_bmap_size(pages, shift);
             /*
              * The initial dirty bitmap for migration must be set with all
              * ones to make sure we'll migrate every guest RAM page to
@@ -2751,7 +2752,17 @@ static void ram_list_init_bitmaps(void)
                 block->file_bmap = bitmap_new(pages);
             }
             block->clear_bmap_shift = shift;
-            block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
+            block->clear_bmap = bitmap_new(clear_bmap_pages);
+            /*
+             * Set the clear bitmap by default to enable dirty logging.
+             *
+             * Note that with KVM_DIRTY_LOG_INITIALLY_SET, dirty logging
+             * will be enabled gradually in small chunks using
+             * KVM_CLEAR_DIRTY_LOG
+             */
+            if (kvm_dirty_log_manual_enabled()) {
+                bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
+            }
         }
     }
 }
@@ -2771,6 +2782,7 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
 
 static bool ram_init_bitmaps(RAMState *rs, Error **errp)
 {
+    Error *local_err = NULL;
     bool ret = true;
 
     qemu_mutex_lock_ramlist();
@@ -2783,7 +2795,19 @@ static bool ram_init_bitmaps(RAMState *rs, Error **errp)
             if (!ret) {
                 goto out_unlock;
             }
-            migration_bitmap_sync_precopy(false);
+
+            if (kvm_dirty_log_manual_enabled()) {
+                /*
+                 * Bypass the RAMBlock dirty sync and still publish a
+                 * notification.
+                 */
+                if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC,
+                            &local_err)) {
+                    error_report_err(local_err);
+                }
+            } else {
+                migration_bitmap_sync_precopy(false);
+            }
         }
     }
 out_unlock:
-- 
2.27.0
Re: [PATCH RFC 2/2] migration: Avoid doing RAMBlock dirty sync in the initial iteration
Posted by Peter Xu 3 weeks, 4 days ago
On Wed, Oct 23, 2024 at 10:09:51AM +0800, yong.huang@smartx.com wrote:
> From: Hyman Huang <yong.huang@smartx.com>
> 
> KVM always returns 1 when userspace retrieves a dirty bitmap for
> the first time when KVM_DIRTY_LOG_INITIALLY_SET is enabled; in such
> scenario, the RAMBlock dirty sync of the initial iteration can be
> skipped.
> 
> Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> ---
>  migration/cpu-throttle.c |  3 ++-
>  migration/ram.c          | 30 +++++++++++++++++++++++++++---
>  2 files changed, 29 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> index 342681cdd4..06e3b1be78 100644
> --- a/migration/cpu-throttle.c
> +++ b/migration/cpu-throttle.c
> @@ -27,6 +27,7 @@
>  #include "hw/core/cpu.h"
>  #include "qemu/main-loop.h"
>  #include "sysemu/cpus.h"
> +#include "sysemu/kvm.h"
>  #include "cpu-throttle.h"
>  #include "migration.h"
>  #include "migration-stats.h"
> @@ -141,7 +142,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
>       * effect on guest performance, therefore omit it to avoid
>       * paying extra for the sync penalty.
>       */
> -    if (sync_cnt <= 1) {
> +    if (sync_cnt <= (kvm_dirty_log_manual_enabled() ? 0 : 1)) {
>          goto end;
>      }
>  
> diff --git a/migration/ram.c b/migration/ram.c
> index d284f63854..b312ebd69d 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
>  {
>      MigrationState *ms = migrate_get_current();
>      RAMBlock *block;
> -    unsigned long pages;
> +    unsigned long pages, clear_bmap_pages;
>      uint8_t shift;
>  
>      /* Skip setting bitmap if there is no RAM */
> @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
>  
>          RAMBLOCK_FOREACH_NOT_IGNORED(block) {
>              pages = block->max_length >> TARGET_PAGE_BITS;
> +            clear_bmap_pages = clear_bmap_size(pages, shift);
>              /*
>               * The initial dirty bitmap for migration must be set with all
>               * ones to make sure we'll migrate every guest RAM page to
> @@ -2751,7 +2752,17 @@ static void ram_list_init_bitmaps(void)
>                  block->file_bmap = bitmap_new(pages);
>              }
>              block->clear_bmap_shift = shift;
> -            block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
> +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> +            /*
> +             * Set the clear bitmap by default to enable dirty logging.
> +             *
> +             * Note that with KVM_DIRTY_LOG_INITIALLY_SET, dirty logging
> +             * will be enabled gradually in small chunks using
> +             * KVM_CLEAR_DIRTY_LOG
> +             */
> +            if (kvm_dirty_log_manual_enabled()) {
> +                bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
> +            }

Why it needs to be relevant to whether DIRTY_LOG is enabled?

I wonder if we should always set clear_bmap to 1 unconditionally, as we
always set bmap to all 1s by default.

Then we skip sync always during setup, dropping patch 1.

>          }
>      }
>  }
> @@ -2771,6 +2782,7 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
>  
>  static bool ram_init_bitmaps(RAMState *rs, Error **errp)
>  {
> +    Error *local_err = NULL;
>      bool ret = true;
>  
>      qemu_mutex_lock_ramlist();
> @@ -2783,7 +2795,19 @@ static bool ram_init_bitmaps(RAMState *rs, Error **errp)
>              if (!ret) {
>                  goto out_unlock;
>              }
> -            migration_bitmap_sync_precopy(false);
> +
> +            if (kvm_dirty_log_manual_enabled()) {
> +                /*
> +                 * Bypass the RAMBlock dirty sync and still publish a
> +                 * notification.
> +                 */
> +                if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC,
> +                            &local_err)) {
> +                    error_report_err(local_err);
> +                }
> +            } else {
> +                migration_bitmap_sync_precopy(false);
> +            }
>          }
>      }
>  out_unlock:
> -- 
> 2.27.0
> 

-- 
Peter Xu
Re: [PATCH RFC 2/2] migration: Avoid doing RAMBlock dirty sync in the initial iteration
Posted by Yong Huang 3 weeks, 3 days ago
On Wed, Oct 30, 2024 at 12:21 AM Peter Xu <peterx@redhat.com> wrote:

> On Wed, Oct 23, 2024 at 10:09:51AM +0800, yong.huang@smartx.com wrote:
> > From: Hyman Huang <yong.huang@smartx.com>
> >
> > KVM always returns 1 when userspace retrieves a dirty bitmap for
> > the first time when KVM_DIRTY_LOG_INITIALLY_SET is enabled; in such
> > scenario, the RAMBlock dirty sync of the initial iteration can be
> > skipped.
> >
> > Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> > ---
> >  migration/cpu-throttle.c |  3 ++-
> >  migration/ram.c          | 30 +++++++++++++++++++++++++++---
> >  2 files changed, 29 insertions(+), 4 deletions(-)
> >
> > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> > index 342681cdd4..06e3b1be78 100644
> > --- a/migration/cpu-throttle.c
> > +++ b/migration/cpu-throttle.c
> > @@ -27,6 +27,7 @@
> >  #include "hw/core/cpu.h"
> >  #include "qemu/main-loop.h"
> >  #include "sysemu/cpus.h"
> > +#include "sysemu/kvm.h"
> >  #include "cpu-throttle.h"
> >  #include "migration.h"
> >  #include "migration-stats.h"
> > @@ -141,7 +142,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
> >       * effect on guest performance, therefore omit it to avoid
> >       * paying extra for the sync penalty.
> >       */
> > -    if (sync_cnt <= 1) {
> > +    if (sync_cnt <= (kvm_dirty_log_manual_enabled() ? 0 : 1)) {
> >          goto end;
> >      }
> >
> > diff --git a/migration/ram.c b/migration/ram.c
> > index d284f63854..b312ebd69d 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
> >  {
> >      MigrationState *ms = migrate_get_current();
> >      RAMBlock *block;
> > -    unsigned long pages;
> > +    unsigned long pages, clear_bmap_pages;
> >      uint8_t shift;
> >
> >      /* Skip setting bitmap if there is no RAM */
> > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
> >
> >          RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> >              pages = block->max_length >> TARGET_PAGE_BITS;
> > +            clear_bmap_pages = clear_bmap_size(pages, shift);
> >              /*
> >               * The initial dirty bitmap for migration must be set with
> all
> >               * ones to make sure we'll migrate every guest RAM page to
> > @@ -2751,7 +2752,17 @@ static void ram_list_init_bitmaps(void)
> >                  block->file_bmap = bitmap_new(pages);
> >              }
> >              block->clear_bmap_shift = shift;
> > -            block->clear_bmap = bitmap_new(clear_bmap_size(pages,
> shift));
> > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> > +            /*
> > +             * Set the clear bitmap by default to enable dirty logging.
> > +             *
> > +             * Note that with KVM_DIRTY_LOG_INITIALLY_SET, dirty logging
> > +             * will be enabled gradually in small chunks using
> > +             * KVM_CLEAR_DIRTY_LOG
> > +             */
> > +            if (kvm_dirty_log_manual_enabled()) {
> > +                bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
> > +            }
>
> Why it needs to be relevant to whether DIRTY_LOG is enabled?
>
> I wonder if we should always set clear_bmap to 1 unconditionally, as we
> always set bmap to all 1s by default.
>

OK, this works. We can drop it.


>
> Then we skip sync always during setup, dropping patch 1.
>

IIUC, KVM initializes the slot->dirty_bitmap with 1 only when
KVM_DIRTY_LOG_INITIALLY_SET is enabled, 0 otherwize.
This means that if KVM does not support the
KVM_DIRTY_LOG_INITIALLY_SET feature, userspace should
do the first sync so that KVM could set the WP bit and clear
the D-bit of the PTE.

Skipping first sync could handle this scenario?


> >          }
> >      }
> >  }
> > @@ -2771,6 +2782,7 @@ static void
> migration_bitmap_clear_discarded_pages(RAMState *rs)
> >
> >  static bool ram_init_bitmaps(RAMState *rs, Error **errp)
> >  {
> > +    Error *local_err = NULL;
> >      bool ret = true;
> >
> >      qemu_mutex_lock_ramlist();
> > @@ -2783,7 +2795,19 @@ static bool ram_init_bitmaps(RAMState *rs, Error
> **errp)
> >              if (!ret) {
> >                  goto out_unlock;
> >              }
> > -            migration_bitmap_sync_precopy(false);
> > +
> > +            if (kvm_dirty_log_manual_enabled()) {
> > +                /*
> > +                 * Bypass the RAMBlock dirty sync and still publish a
> > +                 * notification.
> > +                 */
> > +                if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC,
> > +                            &local_err)) {
> > +                    error_report_err(local_err);
> > +                }
> > +            } else {
> > +                migration_bitmap_sync_precopy(false);
> > +            }
> >          }
> >      }
> >  out_unlock:
> > --
> > 2.27.0
> >
>
> --
> Peter Xu
>
>

-- 
Best regards
Re: [PATCH RFC 2/2] migration: Avoid doing RAMBlock dirty sync in the initial iteration
Posted by Peter Xu 3 weeks, 3 days ago
On Wed, Oct 30, 2024 at 10:09:38AM +0800, Yong Huang wrote:
> On Wed, Oct 30, 2024 at 12:21 AM Peter Xu <peterx@redhat.com> wrote:
> 
> > On Wed, Oct 23, 2024 at 10:09:51AM +0800, yong.huang@smartx.com wrote:
> > > From: Hyman Huang <yong.huang@smartx.com>
> > >
> > > KVM always returns 1 when userspace retrieves a dirty bitmap for
> > > the first time when KVM_DIRTY_LOG_INITIALLY_SET is enabled; in such
> > > scenario, the RAMBlock dirty sync of the initial iteration can be
> > > skipped.
> > >
> > > Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> > > ---
> > >  migration/cpu-throttle.c |  3 ++-
> > >  migration/ram.c          | 30 +++++++++++++++++++++++++++---
> > >  2 files changed, 29 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> > > index 342681cdd4..06e3b1be78 100644
> > > --- a/migration/cpu-throttle.c
> > > +++ b/migration/cpu-throttle.c
> > > @@ -27,6 +27,7 @@
> > >  #include "hw/core/cpu.h"
> > >  #include "qemu/main-loop.h"
> > >  #include "sysemu/cpus.h"
> > > +#include "sysemu/kvm.h"
> > >  #include "cpu-throttle.h"
> > >  #include "migration.h"
> > >  #include "migration-stats.h"
> > > @@ -141,7 +142,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
> > >       * effect on guest performance, therefore omit it to avoid
> > >       * paying extra for the sync penalty.
> > >       */
> > > -    if (sync_cnt <= 1) {
> > > +    if (sync_cnt <= (kvm_dirty_log_manual_enabled() ? 0 : 1)) {
> > >          goto end;
> > >      }
> > >
> > > diff --git a/migration/ram.c b/migration/ram.c
> > > index d284f63854..b312ebd69d 100644
> > > --- a/migration/ram.c
> > > +++ b/migration/ram.c
> > > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
> > >  {
> > >      MigrationState *ms = migrate_get_current();
> > >      RAMBlock *block;
> > > -    unsigned long pages;
> > > +    unsigned long pages, clear_bmap_pages;
> > >      uint8_t shift;
> > >
> > >      /* Skip setting bitmap if there is no RAM */
> > > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
> > >
> > >          RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> > >              pages = block->max_length >> TARGET_PAGE_BITS;
> > > +            clear_bmap_pages = clear_bmap_size(pages, shift);
> > >              /*
> > >               * The initial dirty bitmap for migration must be set with
> > all
> > >               * ones to make sure we'll migrate every guest RAM page to
> > > @@ -2751,7 +2752,17 @@ static void ram_list_init_bitmaps(void)
> > >                  block->file_bmap = bitmap_new(pages);
> > >              }
> > >              block->clear_bmap_shift = shift;
> > > -            block->clear_bmap = bitmap_new(clear_bmap_size(pages,
> > shift));
> > > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> > > +            /*
> > > +             * Set the clear bitmap by default to enable dirty logging.
> > > +             *
> > > +             * Note that with KVM_DIRTY_LOG_INITIALLY_SET, dirty logging
> > > +             * will be enabled gradually in small chunks using
> > > +             * KVM_CLEAR_DIRTY_LOG
> > > +             */
> > > +            if (kvm_dirty_log_manual_enabled()) {
> > > +                bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
> > > +            }
> >
> > Why it needs to be relevant to whether DIRTY_LOG is enabled?
> >
> > I wonder if we should always set clear_bmap to 1 unconditionally, as we
> > always set bmap to all 1s by default.
> >
> 
> OK, this works. We can drop it.
> 
> 
> >
> > Then we skip sync always during setup, dropping patch 1.
> >
> 
> IIUC, KVM initializes the slot->dirty_bitmap with 1 only when
> KVM_DIRTY_LOG_INITIALLY_SET is enabled, 0 otherwize.
> This means that if KVM does not support the
> KVM_DIRTY_LOG_INITIALLY_SET feature, userspace should
> do the first sync so that KVM could set the WP bit and clear
> the D-bit of the PTE.
> 
> Skipping first sync could handle this scenario?

Yes, the old kernels could be tricky (!CLEAR_LOG support), but I hope it's
also working.

The thing is log_start() should also protect all pages if that's the case.
For x86, that should corresponds to:

kvm_mmu_slot_apply_flags():

		/*
		 * Initially-all-set does not require write protecting any page,
		 * because they're all assumed to be dirty.
		 */
		if (kvm_dirty_log_manual_protect_and_init_set(kvm))
			return;

		if (READ_ONCE(eager_page_split))
			kvm_mmu_slot_try_split_huge_pages(kvm, new, PG_LEVEL_4K);

		if (kvm_x86_ops.cpu_dirty_log_size) {
			kvm_mmu_slot_leaf_clear_dirty(kvm, new);
			kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_2M);
		} else {
			kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
		}

In general, I think even if GET_DIRTY_LOG in setup() is omitted, all pages
should still be wr-protected already right after log_start().  Then follow
up log_clear()s will be noop, until the next sync which will reprotect
every page again.

Please double check.

Thanks,

-- 
Peter Xu


Re: [PATCH RFC 2/2] migration: Avoid doing RAMBlock dirty sync in the initial iteration
Posted by Yong Huang 3 weeks, 2 days ago
On Thu, Oct 31, 2024 at 3:43 AM Peter Xu <peterx@redhat.com> wrote:

> On Wed, Oct 30, 2024 at 10:09:38AM +0800, Yong Huang wrote:
> > On Wed, Oct 30, 2024 at 12:21 AM Peter Xu <peterx@redhat.com> wrote:
> >
> > > On Wed, Oct 23, 2024 at 10:09:51AM +0800, yong.huang@smartx.com wrote:
> > > > From: Hyman Huang <yong.huang@smartx.com>
> > > >
> > > > KVM always returns 1 when userspace retrieves a dirty bitmap for
> > > > the first time when KVM_DIRTY_LOG_INITIALLY_SET is enabled; in such
> > > > scenario, the RAMBlock dirty sync of the initial iteration can be
> > > > skipped.
> > > >
> > > > Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> > > > ---
> > > >  migration/cpu-throttle.c |  3 ++-
> > > >  migration/ram.c          | 30 +++++++++++++++++++++++++++---
> > > >  2 files changed, 29 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> > > > index 342681cdd4..06e3b1be78 100644
> > > > --- a/migration/cpu-throttle.c
> > > > +++ b/migration/cpu-throttle.c
> > > > @@ -27,6 +27,7 @@
> > > >  #include "hw/core/cpu.h"
> > > >  #include "qemu/main-loop.h"
> > > >  #include "sysemu/cpus.h"
> > > > +#include "sysemu/kvm.h"
> > > >  #include "cpu-throttle.h"
> > > >  #include "migration.h"
> > > >  #include "migration-stats.h"
> > > > @@ -141,7 +142,7 @@ void cpu_throttle_dirty_sync_timer_tick(void
> *opaque)
> > > >       * effect on guest performance, therefore omit it to avoid
> > > >       * paying extra for the sync penalty.
> > > >       */
> > > > -    if (sync_cnt <= 1) {
> > > > +    if (sync_cnt <= (kvm_dirty_log_manual_enabled() ? 0 : 1)) {
> > > >          goto end;
> > > >      }
> > > >
> > > > diff --git a/migration/ram.c b/migration/ram.c
> > > > index d284f63854..b312ebd69d 100644
> > > > --- a/migration/ram.c
> > > > +++ b/migration/ram.c
> > > > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
> > > >  {
> > > >      MigrationState *ms = migrate_get_current();
> > > >      RAMBlock *block;
> > > > -    unsigned long pages;
> > > > +    unsigned long pages, clear_bmap_pages;
> > > >      uint8_t shift;
> > > >
> > > >      /* Skip setting bitmap if there is no RAM */
> > > > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
> > > >
> > > >          RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> > > >              pages = block->max_length >> TARGET_PAGE_BITS;
> > > > +            clear_bmap_pages = clear_bmap_size(pages, shift);
> > > >              /*
> > > >               * The initial dirty bitmap for migration must be set
> with
> > > all
> > > >               * ones to make sure we'll migrate every guest RAM page
> to
> > > > @@ -2751,7 +2752,17 @@ static void ram_list_init_bitmaps(void)
> > > >                  block->file_bmap = bitmap_new(pages);
> > > >              }
> > > >              block->clear_bmap_shift = shift;
> > > > -            block->clear_bmap = bitmap_new(clear_bmap_size(pages,
> > > shift));
> > > > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> > > > +            /*
> > > > +             * Set the clear bitmap by default to enable dirty
> logging.
> > > > +             *
> > > > +             * Note that with KVM_DIRTY_LOG_INITIALLY_SET, dirty
> logging
> > > > +             * will be enabled gradually in small chunks using
> > > > +             * KVM_CLEAR_DIRTY_LOG
> > > > +             */
> > > > +            if (kvm_dirty_log_manual_enabled()) {
> > > > +                bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
> > > > +            }
> > >
> > > Why it needs to be relevant to whether DIRTY_LOG is enabled?
> > >
> > > I wonder if we should always set clear_bmap to 1 unconditionally, as we
> > > always set bmap to all 1s by default.
> > >
> >
> > OK, this works. We can drop it.
> >
> >
> > >
> > > Then we skip sync always during setup, dropping patch 1.
> > >
> >
> > IIUC, KVM initializes the slot->dirty_bitmap with 1 only when
> > KVM_DIRTY_LOG_INITIALLY_SET is enabled, 0 otherwize.
> > This means that if KVM does not support the
> > KVM_DIRTY_LOG_INITIALLY_SET feature, userspace should
> > do the first sync so that KVM could set the WP bit and clear
> > the D-bit of the PTE.
> >
> > Skipping first sync could handle this scenario?
>
> Yes, the old kernels could be tricky (!CLEAR_LOG support), but I hope it's
> also working.
>
> The thing is log_start() should also protect all pages if that's the case.
> For x86, that should corresponds to:
>
> kvm_mmu_slot_apply_flags():
>
>                 /*
>                  * Initially-all-set does not require write protecting any
> page,
>                  * because they're all assumed to be dirty.
>                  */
>                 if (kvm_dirty_log_manual_protect_and_init_set(kvm))
>                         return;
>
>                 if (READ_ONCE(eager_page_split))
>                         kvm_mmu_slot_try_split_huge_pages(kvm, new,
> PG_LEVEL_4K);
>
>                 if (kvm_x86_ops.cpu_dirty_log_size) {
>                         kvm_mmu_slot_leaf_clear_dirty(kvm, new);
>                         kvm_mmu_slot_remove_write_access(kvm, new,
> PG_LEVEL_2M);
>                 } else {
>                         kvm_mmu_slot_remove_write_access(kvm, new,
> PG_LEVEL_4K);
>                 }
>

This path does the write protecting indeed. Thanks for pointing this out.


>
> In general, I think even if GET_DIRTY_LOG in setup() is omitted, all pages
> should still be wr-protected already right after log_start().  Then follow
> up log_clear()s will be noop, until the next sync which will reprotect
> every page again.
>

Agree. I'll drop the patch 1 and simplify the patch 2 in the next version.

Thanks for the comments.


>
> Please double check.
>
> Thanks,
>
> --
> Peter Xu
>
>
Yong

-- 
Best regards