[PATCH] migration: Do not perform RAMBlock dirty sync during the first iteration

yong.huang@smartx.com posted 1 patch 2 weeks, 2 days ago
migration/cpu-throttle.c |  2 +-
migration/ram.c          | 19 ++++++++++++++++---
2 files changed, 17 insertions(+), 4 deletions(-)
[PATCH] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by yong.huang@smartx.com 2 weeks, 2 days ago
From: Hyman Huang <yong.huang@smartx.com>

The first iteration's RAMBlock dirty sync can be omitted because QEMU
always initializes the RAMBlock's bmap to all 1s by default.

Signed-off-by: Hyman Huang <yong.huang@smartx.com>
---
 migration/cpu-throttle.c |  2 +-
 migration/ram.c          | 19 ++++++++++++++++---
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
index 5179019e33..674dc2004e 100644
--- a/migration/cpu-throttle.c
+++ b/migration/cpu-throttle.c
@@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
      * effect on guest performance, therefore omit it to avoid
      * paying extra for the sync penalty.
      */
-    if (sync_cnt <= 1) {
+    if (!sync_cnt) {
         goto end;
     }
 
diff --git a/migration/ram.c b/migration/ram.c
index 05ff9eb328..a0123eb93e 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
 {
     MigrationState *ms = migrate_get_current();
     RAMBlock *block;
-    unsigned long pages;
+    unsigned long pages, clear_bmap_pages;
     uint8_t shift;
 
     /* Skip setting bitmap if there is no RAM */
@@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
 
         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
             pages = block->max_length >> TARGET_PAGE_BITS;
+            clear_bmap_pages = clear_bmap_size(pages, shift);
             /*
              * The initial dirty bitmap for migration must be set with all
              * ones to make sure we'll migrate every guest RAM page to
@@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
                 block->file_bmap = bitmap_new(pages);
             }
             block->clear_bmap_shift = shift;
-            block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
+            block->clear_bmap = bitmap_new(clear_bmap_pages);
+            /*
+             * Set clear_bmap to 1 unconditionally, as we always set bmap
+             * to all 1s by default.
+             */
+            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
         }
     }
 }
@@ -2771,6 +2777,7 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
 
 static bool ram_init_bitmaps(RAMState *rs, Error **errp)
 {
+    Error *local_err = NULL;
     bool ret = true;
 
     qemu_mutex_lock_ramlist();
@@ -2783,7 +2790,13 @@ static bool ram_init_bitmaps(RAMState *rs, Error **errp)
             if (!ret) {
                 goto out_unlock;
             }
-            migration_bitmap_sync_precopy(false);
+            /*
+             * Bypass the RAMBlock dirty sync and still publish the
+             * notification.
+             */
+            if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
+                error_report_err(local_err);
+            }
         }
     }
 out_unlock:
-- 
2.27.0
Re: [PATCH] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Peter Xu 2 weeks, 2 days ago
On Thu, Nov 07, 2024 at 05:56:50PM +0800, yong.huang@smartx.com wrote:
> From: Hyman Huang <yong.huang@smartx.com>
> 
> The first iteration's RAMBlock dirty sync can be omitted because QEMU
> always initializes the RAMBlock's bmap to all 1s by default.
> 
> Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> ---
>  migration/cpu-throttle.c |  2 +-
>  migration/ram.c          | 19 ++++++++++++++++---
>  2 files changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> index 5179019e33..674dc2004e 100644
> --- a/migration/cpu-throttle.c
> +++ b/migration/cpu-throttle.c
> @@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
>       * effect on guest performance, therefore omit it to avoid
>       * paying extra for the sync penalty.
>       */
> -    if (sync_cnt <= 1) {
> +    if (!sync_cnt) {
>          goto end;
>      }
>  
> diff --git a/migration/ram.c b/migration/ram.c
> index 05ff9eb328..a0123eb93e 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
>  {
>      MigrationState *ms = migrate_get_current();
>      RAMBlock *block;
> -    unsigned long pages;
> +    unsigned long pages, clear_bmap_pages;
>      uint8_t shift;
>  
>      /* Skip setting bitmap if there is no RAM */
> @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
>  
>          RAMBLOCK_FOREACH_NOT_IGNORED(block) {
>              pages = block->max_length >> TARGET_PAGE_BITS;
> +            clear_bmap_pages = clear_bmap_size(pages, shift);
>              /*
>               * The initial dirty bitmap for migration must be set with all
>               * ones to make sure we'll migrate every guest RAM page to
> @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
>                  block->file_bmap = bitmap_new(pages);
>              }
>              block->clear_bmap_shift = shift;
> -            block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
> +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> +            /*
> +             * Set clear_bmap to 1 unconditionally, as we always set bmap
> +             * to all 1s by default.
> +             */
> +            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
>          }
>      }
>  }
> @@ -2771,6 +2777,7 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
>  
>  static bool ram_init_bitmaps(RAMState *rs, Error **errp)
>  {
> +    Error *local_err = NULL;
>      bool ret = true;
>  
>      qemu_mutex_lock_ramlist();
> @@ -2783,7 +2790,13 @@ static bool ram_init_bitmaps(RAMState *rs, Error **errp)
>              if (!ret) {
>                  goto out_unlock;
>              }
> -            migration_bitmap_sync_precopy(false);
> +            /*
> +             * Bypass the RAMBlock dirty sync and still publish the
> +             * notification.

Hmm.. Why should QEMU notify AFTER_BITMAP_SYNC if the sync didn't happen?

> +             */
> +            if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
> +                error_report_err(local_err);
> +            }
>          }
>      }
>  out_unlock:
> -- 
> 2.27.0
> 

-- 
Peter Xu
Re: [PATCH] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Yong Huang 2 weeks, 1 day ago
On Fri, Nov 8, 2024 at 12:28 AM Peter Xu <peterx@redhat.com> wrote:

> On Thu, Nov 07, 2024 at 05:56:50PM +0800, yong.huang@smartx.com wrote:
> > From: Hyman Huang <yong.huang@smartx.com>
> >
> > The first iteration's RAMBlock dirty sync can be omitted because QEMU
> > always initializes the RAMBlock's bmap to all 1s by default.
> >
> > Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> > ---
> >  migration/cpu-throttle.c |  2 +-
> >  migration/ram.c          | 19 ++++++++++++++++---
> >  2 files changed, 17 insertions(+), 4 deletions(-)
> >
> > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> > index 5179019e33..674dc2004e 100644
> > --- a/migration/cpu-throttle.c
> > +++ b/migration/cpu-throttle.c
> > @@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
> >       * effect on guest performance, therefore omit it to avoid
> >       * paying extra for the sync penalty.
> >       */
> > -    if (sync_cnt <= 1) {
> > +    if (!sync_cnt) {
> >          goto end;
> >      }
> >
> > diff --git a/migration/ram.c b/migration/ram.c
> > index 05ff9eb328..a0123eb93e 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
> >  {
> >      MigrationState *ms = migrate_get_current();
> >      RAMBlock *block;
> > -    unsigned long pages;
> > +    unsigned long pages, clear_bmap_pages;
> >      uint8_t shift;
> >
> >      /* Skip setting bitmap if there is no RAM */
> > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
> >
> >          RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> >              pages = block->max_length >> TARGET_PAGE_BITS;
> > +            clear_bmap_pages = clear_bmap_size(pages, shift);
> >              /*
> >               * The initial dirty bitmap for migration must be set with
> all
> >               * ones to make sure we'll migrate every guest RAM page to
> > @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
> >                  block->file_bmap = bitmap_new(pages);
> >              }
> >              block->clear_bmap_shift = shift;
> > -            block->clear_bmap = bitmap_new(clear_bmap_size(pages,
> shift));
> > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> > +            /*
> > +             * Set clear_bmap to 1 unconditionally, as we always set
> bmap
> > +             * to all 1s by default.
> > +             */
> > +            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
> >          }
> >      }
> >  }
> > @@ -2771,6 +2777,7 @@ static void
> migration_bitmap_clear_discarded_pages(RAMState *rs)
> >
> >  static bool ram_init_bitmaps(RAMState *rs, Error **errp)
> >  {
> > +    Error *local_err = NULL;
> >      bool ret = true;
> >
> >      qemu_mutex_lock_ramlist();
> > @@ -2783,7 +2790,13 @@ static bool ram_init_bitmaps(RAMState *rs, Error
> **errp)
> >              if (!ret) {
> >                  goto out_unlock;
> >              }
> > -            migration_bitmap_sync_precopy(false);
> > +            /*
> > +             * Bypass the RAMBlock dirty sync and still publish the
> > +             * notification.
>
> Hmm.. Why should QEMU notify AFTER_BITMAP_SYNC if the sync didn't happen?
>

Indeed, logically, we should not make the notification.

Some features, like VIRTIO_BALLOON_F_FREE_PAGE_HINT, use this notification
to indirectly detect whether the RAMBlock's bmap has been updated. This
allows the
free page optimization to begin clearing parts of the bitmap that contain
free pages.

virtio_balloon_free_page_hint_notify
......
    switch (pnd->reason) {
    case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
        virtio_balloon_free_page_stop(dev);
        break;
    case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
        if (vdev->vm_running) {
            virtio_balloon_free_page_start(dev);
            break;
        }

The free page optimization may miss the first time window to execute if we
don't
send out a notification after starting the migration and initializing the
bmap with all 1s.

May we change the old behavior of optimization?


> > +             */
> > +            if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC,
> &local_err)) {
> > +                error_report_err(local_err);
> > +            }
> >          }
> >      }
> >  out_unlock:
> > --
> > 2.27.0
> >
>
> --
> Peter Xu
>
>

-- 
Best regards
Re: [PATCH] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Peter Xu 2 weeks, 1 day ago
On Fri, Nov 08, 2024 at 02:03:47PM +0800, Yong Huang wrote:
> On Fri, Nov 8, 2024 at 12:28 AM Peter Xu <peterx@redhat.com> wrote:
> 
> > On Thu, Nov 07, 2024 at 05:56:50PM +0800, yong.huang@smartx.com wrote:
> > > From: Hyman Huang <yong.huang@smartx.com>
> > >
> > > The first iteration's RAMBlock dirty sync can be omitted because QEMU
> > > always initializes the RAMBlock's bmap to all 1s by default.
> > >
> > > Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> > > ---
> > >  migration/cpu-throttle.c |  2 +-
> > >  migration/ram.c          | 19 ++++++++++++++++---
> > >  2 files changed, 17 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> > > index 5179019e33..674dc2004e 100644
> > > --- a/migration/cpu-throttle.c
> > > +++ b/migration/cpu-throttle.c
> > > @@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
> > >       * effect on guest performance, therefore omit it to avoid
> > >       * paying extra for the sync penalty.
> > >       */
> > > -    if (sync_cnt <= 1) {
> > > +    if (!sync_cnt) {
> > >          goto end;
> > >      }
> > >
> > > diff --git a/migration/ram.c b/migration/ram.c
> > > index 05ff9eb328..a0123eb93e 100644
> > > --- a/migration/ram.c
> > > +++ b/migration/ram.c
> > > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
> > >  {
> > >      MigrationState *ms = migrate_get_current();
> > >      RAMBlock *block;
> > > -    unsigned long pages;
> > > +    unsigned long pages, clear_bmap_pages;
> > >      uint8_t shift;
> > >
> > >      /* Skip setting bitmap if there is no RAM */
> > > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
> > >
> > >          RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> > >              pages = block->max_length >> TARGET_PAGE_BITS;
> > > +            clear_bmap_pages = clear_bmap_size(pages, shift);
> > >              /*
> > >               * The initial dirty bitmap for migration must be set with
> > all
> > >               * ones to make sure we'll migrate every guest RAM page to
> > > @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
> > >                  block->file_bmap = bitmap_new(pages);
> > >              }
> > >              block->clear_bmap_shift = shift;
> > > -            block->clear_bmap = bitmap_new(clear_bmap_size(pages,
> > shift));
> > > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> > > +            /*
> > > +             * Set clear_bmap to 1 unconditionally, as we always set
> > bmap
> > > +             * to all 1s by default.
> > > +             */
> > > +            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
> > >          }
> > >      }
> > >  }
> > > @@ -2771,6 +2777,7 @@ static void
> > migration_bitmap_clear_discarded_pages(RAMState *rs)
> > >
> > >  static bool ram_init_bitmaps(RAMState *rs, Error **errp)
> > >  {
> > > +    Error *local_err = NULL;
> > >      bool ret = true;
> > >
> > >      qemu_mutex_lock_ramlist();
> > > @@ -2783,7 +2790,13 @@ static bool ram_init_bitmaps(RAMState *rs, Error
> > **errp)
> > >              if (!ret) {
> > >                  goto out_unlock;
> > >              }
> > > -            migration_bitmap_sync_precopy(false);
> > > +            /*
> > > +             * Bypass the RAMBlock dirty sync and still publish the
> > > +             * notification.
> >
> > Hmm.. Why should QEMU notify AFTER_BITMAP_SYNC if the sync didn't happen?
> >
> 
> Indeed, logically, we should not make the notification.
> 
> Some features, like VIRTIO_BALLOON_F_FREE_PAGE_HINT, use this notification
> to indirectly detect whether the RAMBlock's bmap has been updated. This
> allows the
> free page optimization to begin clearing parts of the bitmap that contain
> free pages.
> 
> virtio_balloon_free_page_hint_notify
> ......
>     switch (pnd->reason) {
>     case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
>         virtio_balloon_free_page_stop(dev);
>         break;
>     case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
>         if (vdev->vm_running) {
>             virtio_balloon_free_page_start(dev);
>             break;
>         }
> 
> The free page optimization may miss the first time window to execute if we
> don't
> send out a notification after starting the migration and initializing the
> bmap with all 1s.
> 
> May we change the old behavior of optimization?

I see.

It looks like an abuse to me so far to use AFTER_BITMAP_SYNC as start of
free page hinting.  There's no guarantee a sync is needed when start
migration.

I think we may want a pre-requisite patch to enable free page hinting
during PRECOPY_NOTIFY_SETUP too, like PRECOPY_NOTIFY_AFTER_BITMAP_SYNC.
That patch (if you agree) will need to copy David Hildenbrand and Wei Wang
(original author).

Thanks,

-- 
Peter Xu


Re: [PATCH] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Yong Huang 2 weeks ago
On Fri, Nov 8, 2024 at 9:50 PM Peter Xu <peterx@redhat.com> wrote:

> On Fri, Nov 08, 2024 at 02:03:47PM +0800, Yong Huang wrote:
> > On Fri, Nov 8, 2024 at 12:28 AM Peter Xu <peterx@redhat.com> wrote:
> >
> > > On Thu, Nov 07, 2024 at 05:56:50PM +0800, yong.huang@smartx.com wrote:
> > > > From: Hyman Huang <yong.huang@smartx.com>
> > > >
> > > > The first iteration's RAMBlock dirty sync can be omitted because QEMU
> > > > always initializes the RAMBlock's bmap to all 1s by default.
> > > >
> > > > Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> > > > ---
> > > >  migration/cpu-throttle.c |  2 +-
> > > >  migration/ram.c          | 19 ++++++++++++++++---
> > > >  2 files changed, 17 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> > > > index 5179019e33..674dc2004e 100644
> > > > --- a/migration/cpu-throttle.c
> > > > +++ b/migration/cpu-throttle.c
> > > > @@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void
> *opaque)
> > > >       * effect on guest performance, therefore omit it to avoid
> > > >       * paying extra for the sync penalty.
> > > >       */
> > > > -    if (sync_cnt <= 1) {
> > > > +    if (!sync_cnt) {
> > > >          goto end;
> > > >      }
> > > >
> > > > diff --git a/migration/ram.c b/migration/ram.c
> > > > index 05ff9eb328..a0123eb93e 100644
> > > > --- a/migration/ram.c
> > > > +++ b/migration/ram.c
> > > > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
> > > >  {
> > > >      MigrationState *ms = migrate_get_current();
> > > >      RAMBlock *block;
> > > > -    unsigned long pages;
> > > > +    unsigned long pages, clear_bmap_pages;
> > > >      uint8_t shift;
> > > >
> > > >      /* Skip setting bitmap if there is no RAM */
> > > > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
> > > >
> > > >          RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> > > >              pages = block->max_length >> TARGET_PAGE_BITS;
> > > > +            clear_bmap_pages = clear_bmap_size(pages, shift);
> > > >              /*
> > > >               * The initial dirty bitmap for migration must be set
> with
> > > all
> > > >               * ones to make sure we'll migrate every guest RAM page
> to
> > > > @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
> > > >                  block->file_bmap = bitmap_new(pages);
> > > >              }
> > > >              block->clear_bmap_shift = shift;
> > > > -            block->clear_bmap = bitmap_new(clear_bmap_size(pages,
> > > shift));
> > > > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> > > > +            /*
> > > > +             * Set clear_bmap to 1 unconditionally, as we always set
> > > bmap
> > > > +             * to all 1s by default.
> > > > +             */
> > > > +            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
> > > >          }
> > > >      }
> > > >  }
> > > > @@ -2771,6 +2777,7 @@ static void
> > > migration_bitmap_clear_discarded_pages(RAMState *rs)
> > > >
> > > >  static bool ram_init_bitmaps(RAMState *rs, Error **errp)
> > > >  {
> > > > +    Error *local_err = NULL;
> > > >      bool ret = true;
> > > >
> > > >      qemu_mutex_lock_ramlist();
> > > > @@ -2783,7 +2790,13 @@ static bool ram_init_bitmaps(RAMState *rs,
> Error
> > > **errp)
> > > >              if (!ret) {
> > > >                  goto out_unlock;
> > > >              }
> > > > -            migration_bitmap_sync_precopy(false);
> > > > +            /*
> > > > +             * Bypass the RAMBlock dirty sync and still publish the
> > > > +             * notification.
> > >
> > > Hmm.. Why should QEMU notify AFTER_BITMAP_SYNC if the sync didn't
> happen?
> > >
> >
> > Indeed, logically, we should not make the notification.
> >
> > Some features, like VIRTIO_BALLOON_F_FREE_PAGE_HINT, use this
> notification
> > to indirectly detect whether the RAMBlock's bmap has been updated. This
> > allows the
> > free page optimization to begin clearing parts of the bitmap that contain
> > free pages.
> >
> > virtio_balloon_free_page_hint_notify
> > ......
> >     switch (pnd->reason) {
> >     case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
> >         virtio_balloon_free_page_stop(dev);
> >         break;
> >     case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
> >         if (vdev->vm_running) {
> >             virtio_balloon_free_page_start(dev);
> >             break;
> >         }
> >
> > The free page optimization may miss the first time window to execute if
> we
> > don't
> > send out a notification after starting the migration and initializing the
> > bmap with all 1s.
> >
> > May we change the old behavior of optimization?
>
> I see.
>
> It looks like an abuse to me so far to use AFTER_BITMAP_SYNC as start of
> free page hinting.  There's no guarantee a sync is needed when start
> migration.
>
> I think we may want a pre-requisite patch to enable free page hinting
> during PRECOPY_NOTIFY_SETUP too, like PRECOPY_NOTIFY_AFTER_BITMAP_SYNC.
> That patch (if you agree) will need to copy David Hildenbrand and Wei Wang
> (original author).
>

Agree, I'll try it in the next version.


>
> Thanks,
>
> --
> Peter Xu
>
>
Thanks,
Yong

-- 
Best regards