[PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration

Hyman Huang posted 2 patches 2 weeks ago
[PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Hyman Huang 2 weeks ago
The first iteration's RAMBlock dirty sync can be omitted because QEMU
always initializes the RAMBlock's bmap to all 1s by default.

Signed-off-by: Hyman Huang <yong.huang@smartx.com>
---
 migration/cpu-throttle.c |  2 +-
 migration/ram.c          | 11 ++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
index 5179019e33..674dc2004e 100644
--- a/migration/cpu-throttle.c
+++ b/migration/cpu-throttle.c
@@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
      * effect on guest performance, therefore omit it to avoid
      * paying extra for the sync penalty.
      */
-    if (sync_cnt <= 1) {
+    if (!sync_cnt) {
         goto end;
     }
 
diff --git a/migration/ram.c b/migration/ram.c
index 05ff9eb328..571dba10b7 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
 {
     MigrationState *ms = migrate_get_current();
     RAMBlock *block;
-    unsigned long pages;
+    unsigned long pages, clear_bmap_pages;
     uint8_t shift;
 
     /* Skip setting bitmap if there is no RAM */
@@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
 
         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
             pages = block->max_length >> TARGET_PAGE_BITS;
+            clear_bmap_pages = clear_bmap_size(pages, shift);
             /*
              * The initial dirty bitmap for migration must be set with all
              * ones to make sure we'll migrate every guest RAM page to
@@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
                 block->file_bmap = bitmap_new(pages);
             }
             block->clear_bmap_shift = shift;
-            block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
+            block->clear_bmap = bitmap_new(clear_bmap_pages);
+            /*
+             * Set clear_bmap to 1 unconditionally, as we always set bmap
+             * to all 1s by default.
+             */
+            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
         }
     }
 }
@@ -2783,7 +2789,6 @@ static bool ram_init_bitmaps(RAMState *rs, Error **errp)
             if (!ret) {
                 goto out_unlock;
             }
-            migration_bitmap_sync_precopy(false);
         }
     }
 out_unlock:
-- 
2.39.1
Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by David Hildenbrand 1 week, 5 days ago
On 09.11.24 05:59, Hyman Huang wrote:
> The first iteration's RAMBlock dirty sync can be omitted because QEMU
> always initializes the RAMBlock's bmap to all 1s by default.
> 
> Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> ---
>   migration/cpu-throttle.c |  2 +-
>   migration/ram.c          | 11 ++++++++---
>   2 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> index 5179019e33..674dc2004e 100644
> --- a/migration/cpu-throttle.c
> +++ b/migration/cpu-throttle.c
> @@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
>        * effect on guest performance, therefore omit it to avoid
>        * paying extra for the sync penalty.
>        */
> -    if (sync_cnt <= 1) {
> +    if (!sync_cnt) {
>           goto end;
>       }
>   
> diff --git a/migration/ram.c b/migration/ram.c
> index 05ff9eb328..571dba10b7 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
>   {
>       MigrationState *ms = migrate_get_current();
>       RAMBlock *block;
> -    unsigned long pages;
> +    unsigned long pages, clear_bmap_pages;
>       uint8_t shift;
>   
>       /* Skip setting bitmap if there is no RAM */
> @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
>   
>           RAMBLOCK_FOREACH_NOT_IGNORED(block) {
>               pages = block->max_length >> TARGET_PAGE_BITS;
> +            clear_bmap_pages = clear_bmap_size(pages, shift);
>               /*
>                * The initial dirty bitmap for migration must be set with all
>                * ones to make sure we'll migrate every guest RAM page to
> @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
>                   block->file_bmap = bitmap_new(pages);
>               }
>               block->clear_bmap_shift = shift;
> -            block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
> +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> +            /*
> +             * Set clear_bmap to 1 unconditionally, as we always set bmap
> +             * to all 1s by default.
> +             */
> +            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
>           }
>       }
>   }
> @@ -2783,7 +2789,6 @@ static bool ram_init_bitmaps(RAMState *rs, Error **errp)
>               if (!ret) {
>                   goto out_unlock;
>               }
> -            migration_bitmap_sync_precopy(false);
>           }
>       }
>   out_unlock:


For virtio-mem, we rely on the migration_bitmap_clear_discarded_pages() 
call to clear all bits that correspond to unplugged memory ranges.

If we ommit the sync, we can likely have bits of unplugged ranges still 
set to "1", meaning we would try migrate them later, although we shouldn't?

Or is that handled differently?

-- 
Cheers,

David / dhildenb
Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Yong Huang 1 week, 5 days ago
On Mon, Nov 11, 2024 at 5:27 PM David Hildenbrand <david@redhat.com> wrote:

> On 09.11.24 05:59, Hyman Huang wrote:
> > The first iteration's RAMBlock dirty sync can be omitted because QEMU
> > always initializes the RAMBlock's bmap to all 1s by default.
> >
> > Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> > ---
> >   migration/cpu-throttle.c |  2 +-
> >   migration/ram.c          | 11 ++++++++---
> >   2 files changed, 9 insertions(+), 4 deletions(-)
> >
> > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> > index 5179019e33..674dc2004e 100644
> > --- a/migration/cpu-throttle.c
> > +++ b/migration/cpu-throttle.c
> > @@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void *opaque)
> >        * effect on guest performance, therefore omit it to avoid
> >        * paying extra for the sync penalty.
> >        */
> > -    if (sync_cnt <= 1) {
> > +    if (!sync_cnt) {
> >           goto end;
> >       }
> >
> > diff --git a/migration/ram.c b/migration/ram.c
> > index 05ff9eb328..571dba10b7 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
> >   {
> >       MigrationState *ms = migrate_get_current();
> >       RAMBlock *block;
> > -    unsigned long pages;
> > +    unsigned long pages, clear_bmap_pages;
> >       uint8_t shift;
> >
> >       /* Skip setting bitmap if there is no RAM */
> > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
> >
> >           RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> >               pages = block->max_length >> TARGET_PAGE_BITS;
> > +            clear_bmap_pages = clear_bmap_size(pages, shift);
> >               /*
> >                * The initial dirty bitmap for migration must be set with
> all
> >                * ones to make sure we'll migrate every guest RAM page to
> > @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
> >                   block->file_bmap = bitmap_new(pages);
> >               }
> >               block->clear_bmap_shift = shift;
> > -            block->clear_bmap = bitmap_new(clear_bmap_size(pages,
> shift));
> > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> > +            /*
> > +             * Set clear_bmap to 1 unconditionally, as we always set
> bmap
> > +             * to all 1s by default.
> > +             */
> > +            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
> >           }
> >       }
> >   }
> > @@ -2783,7 +2789,6 @@ static bool ram_init_bitmaps(RAMState *rs, Error
> **errp)
> >               if (!ret) {
> >                   goto out_unlock;
> >               }
> > -            migration_bitmap_sync_precopy(false);
> >           }
> >       }
> >   out_unlock:
>
>
> For virtio-mem, we rely on the migration_bitmap_clear_discarded_pages()
> call to clear all bits that correspond to unplugged memory ranges.


> If we ommit the sync, we can likely have bits of unplugged ranges still
> set to "1", meaning we would try migrate them later, although we shouldn't?
>


IIUC, migration_bitmap_clear_discarded_pages is still called at the end of
ram_init_bitmaps no matter if we omit the first sync.

PRECOPY_NOTIFY_SETUP notification is sent out at the end of
ram_save_setup(ram_list_init_bitmaps), when
virtio_balloon_free_page_start() is
called, migration_bitmap_clear_discarded_pages() has already completed and
the
bmap has been correctly cleared.

ram_save_setup
   -> ram_list_init_bitmaps
       -> migration_bitmap_clear_discarded_pages
    -> return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);

You can double check it.


>
> Or is that handled differently?
>
> --
> Cheers,
>
> David / dhildenb
>
>
Thanks for the comments,

Yong

-- 
Best regards
Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by David Hildenbrand 1 week, 5 days ago
On 11.11.24 11:08, Yong Huang wrote:
> 
> 
> On Mon, Nov 11, 2024 at 5:27 PM David Hildenbrand <david@redhat.com 
> <mailto:david@redhat.com>> wrote:
> 
>     On 09.11.24 05:59, Hyman Huang wrote:
>      > The first iteration's RAMBlock dirty sync can be omitted because QEMU
>      > always initializes the RAMBlock's bmap to all 1s by default.
>      >
>      > Signed-off-by: Hyman Huang <yong.huang@smartx.com
>     <mailto:yong.huang@smartx.com>>
>      > ---
>      >   migration/cpu-throttle.c |  2 +-
>      >   migration/ram.c          | 11 ++++++++---
>      >   2 files changed, 9 insertions(+), 4 deletions(-)
>      >
>      > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
>      > index 5179019e33..674dc2004e 100644
>      > --- a/migration/cpu-throttle.c
>      > +++ b/migration/cpu-throttle.c
>      > @@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void
>     *opaque)
>      >        * effect on guest performance, therefore omit it to avoid
>      >        * paying extra for the sync penalty.
>      >        */
>      > -    if (sync_cnt <= 1) {
>      > +    if (!sync_cnt) {
>      >           goto end;
>      >       }
>      >
>      > diff --git a/migration/ram.c b/migration/ram.c
>      > index 05ff9eb328..571dba10b7 100644
>      > --- a/migration/ram.c
>      > +++ b/migration/ram.c
>      > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
>      >   {
>      >       MigrationState *ms = migrate_get_current();
>      >       RAMBlock *block;
>      > -    unsigned long pages;
>      > +    unsigned long pages, clear_bmap_pages;
>      >       uint8_t shift;
>      >
>      >       /* Skip setting bitmap if there is no RAM */
>      > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
>      >
>      >           RAMBLOCK_FOREACH_NOT_IGNORED(block) {
>      >               pages = block->max_length >> TARGET_PAGE_BITS;
>      > +            clear_bmap_pages = clear_bmap_size(pages, shift);
>      >               /*
>      >                * The initial dirty bitmap for migration must be
>     set with all
>      >                * ones to make sure we'll migrate every guest RAM
>     page to
>      > @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
>      >                   block->file_bmap = bitmap_new(pages);
>      >               }
>      >               block->clear_bmap_shift = shift;
>      > -            block->clear_bmap =
>     bitmap_new(clear_bmap_size(pages, shift));
>      > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
>      > +            /*
>      > +             * Set clear_bmap to 1 unconditionally, as we always
>     set bmap
>      > +             * to all 1s by default.
>      > +             */
>      > +            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
>      >           }
>      >       }
>      >   }
>      > @@ -2783,7 +2789,6 @@ static bool ram_init_bitmaps(RAMState *rs,
>     Error **errp)
>      >               if (!ret) {
>      >                   goto out_unlock;
>      >               }
>      > -            migration_bitmap_sync_precopy(false);
>      >           }
>      >       }
>      >   out_unlock:
> 
> 
>     For virtio-mem, we rely on the migration_bitmap_clear_discarded_pages()
>     call to clear all bits that correspond to unplugged memory ranges. 
> 
> 
>     If we ommit the sync, we can likely have bits of unplugged ranges still
>     set to "1", meaning we would try migrate them later, although we
>     shouldn't?
> 
> 
> 
> IIUC, migration_bitmap_clear_discarded_pagesis still called at the end of
> ram_init_bitmaps no matter if we omit the first sync.
 > > PRECOPY_NOTIFY_SETUPnotification is sent out at the end of
> ram_save_setup(ram_list_init_bitmaps),when 
> virtio_balloon_free_page_start() is
> called,migration_bitmap_clear_discarded_pages() has already completed 
> and the
> bmap has been correctly cleared.
> 
> ram_save_setup
>     -> ram_list_init_bitmaps
>         -> migration_bitmap_clear_discarded_pages
>      -> return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
> 
> You can double check it.

That's not my concern, let me clarify :)


Assume in KVM the bitmap is all 1s ("everything dirty").

In current code, we will sync the bitmap once (IIRC, clearing any dirty 
bits from KVM).

Then we call migration_bitmap_clear_discarded_pages() to clear all 
"discarded" pages that we shouldn't touch.

When we do the next bitmap sync, we will not get a "1" for discarded 
ranges, and we will never try migrating discarded ranges.


With your patch, we're omitting the first sync. Could we possibly get 
discarded ranges reported from KVM as dirty during the "now first" sync 
*after* the migration_bitmap_clear_discarded_pages() call, and try 
migrating discarded ranges?

I did not dive deep into the code, maybe 
migration_bitmap_clear_discarded_pages() ends up clearing the bits in 
KVM, but I recall that there was something special about the first 
bitmap sync.

-- 
Cheers,

David / dhildenb


Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Yong Huang 1 week, 5 days ago
On Mon, Nov 11, 2024 at 6:42 PM David Hildenbrand <david@redhat.com> wrote:

> On 11.11.24 11:08, Yong Huang wrote:
> >
> >
> > On Mon, Nov 11, 2024 at 5:27 PM David Hildenbrand <david@redhat.com
> > <mailto:david@redhat.com>> wrote:
> >
> >     On 09.11.24 05:59, Hyman Huang wrote:
> >      > The first iteration's RAMBlock dirty sync can be omitted because
> QEMU
> >      > always initializes the RAMBlock's bmap to all 1s by default.
> >      >
> >      > Signed-off-by: Hyman Huang <yong.huang@smartx.com
> >     <mailto:yong.huang@smartx.com>>
> >      > ---
> >      >   migration/cpu-throttle.c |  2 +-
> >      >   migration/ram.c          | 11 ++++++++---
> >      >   2 files changed, 9 insertions(+), 4 deletions(-)
> >      >
> >      > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> >      > index 5179019e33..674dc2004e 100644
> >      > --- a/migration/cpu-throttle.c
> >      > +++ b/migration/cpu-throttle.c
> >      > @@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void
> >     *opaque)
> >      >        * effect on guest performance, therefore omit it to avoid
> >      >        * paying extra for the sync penalty.
> >      >        */
> >      > -    if (sync_cnt <= 1) {
> >      > +    if (!sync_cnt) {
> >      >           goto end;
> >      >       }
> >      >
> >      > diff --git a/migration/ram.c b/migration/ram.c
> >      > index 05ff9eb328..571dba10b7 100644
> >      > --- a/migration/ram.c
> >      > +++ b/migration/ram.c
> >      > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
> >      >   {
> >      >       MigrationState *ms = migrate_get_current();
> >      >       RAMBlock *block;
> >      > -    unsigned long pages;
> >      > +    unsigned long pages, clear_bmap_pages;
> >      >       uint8_t shift;
> >      >
> >      >       /* Skip setting bitmap if there is no RAM */
> >      > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
> >      >
> >      >           RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> >      >               pages = block->max_length >> TARGET_PAGE_BITS;
> >      > +            clear_bmap_pages = clear_bmap_size(pages, shift);
> >      >               /*
> >      >                * The initial dirty bitmap for migration must be
> >     set with all
> >      >                * ones to make sure we'll migrate every guest RAM
> >     page to
> >      > @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
> >      >                   block->file_bmap = bitmap_new(pages);
> >      >               }
> >      >               block->clear_bmap_shift = shift;
> >      > -            block->clear_bmap =
> >     bitmap_new(clear_bmap_size(pages, shift));
> >      > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> >      > +            /*
> >      > +             * Set clear_bmap to 1 unconditionally, as we always
> >     set bmap
> >      > +             * to all 1s by default.
> >      > +             */
> >      > +            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
> >      >           }
> >      >       }
> >      >   }
> >      > @@ -2783,7 +2789,6 @@ static bool ram_init_bitmaps(RAMState *rs,
> >     Error **errp)
> >      >               if (!ret) {
> >      >                   goto out_unlock;
> >      >               }
> >      > -            migration_bitmap_sync_precopy(false);
> >      >           }
> >      >       }
> >      >   out_unlock:
> >
> >
> >     For virtio-mem, we rely on the
> migration_bitmap_clear_discarded_pages()
> >     call to clear all bits that correspond to unplugged memory ranges.
> >
> >
> >     If we ommit the sync, we can likely have bits of unplugged ranges
> still
> >     set to "1", meaning we would try migrate them later, although we
> >     shouldn't?
> >
> >
> >
> > IIUC, migration_bitmap_clear_discarded_pagesis still called at the end of
> > ram_init_bitmaps no matter if we omit the first sync.
>  > > PRECOPY_NOTIFY_SETUPnotification is sent out at the end of
> > ram_save_setup(ram_list_init_bitmaps),when
> > virtio_balloon_free_page_start() is
> > called,migration_bitmap_clear_discarded_pages() has already completed
> > and the
> > bmap has been correctly cleared.
> >
> > ram_save_setup
> >     -> ram_list_init_bitmaps
> >         -> migration_bitmap_clear_discarded_pages
> >      -> return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
> >
> > You can double check it.
>
> That's not my concern, let me clarify :)
>
>
> Assume in KVM the bitmap is all 1s ("everything dirty").
>
> In current code, we will sync the bitmap once (IIRC, clearing any dirty
> bits from KVM).
>

For the old logic, write-protect and clear dirty bits are all done in
the KVM_GET_DIRTY_LOG API, while with
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 feature enabled, clearing
dirty bits are postponed in the KVM_CLEAR_DIRTY_LOG API, which
is called right before page sending in the migration thread in QEMU.


>
> Then we call migration_bitmap_clear_discarded_pages() to clear all
> "discarded" pages that we shouldn't touch.
>
> When we do the next bitmap sync, we will not get a "1" for discarded
> ranges, and we will never try migrating discarded ranges.
>
>
> With your patch, we're omitting the first sync. Could we possibly get
> discarded ranges reported from KVM as dirty during the "now first" sync
> *after* the migration_bitmap_clear_discarded_pages() call, and try
> migrating discarded ranges?
>
> I did not dive deep into the code, maybe
> migration_bitmap_clear_discarded_pages() ends up clearing the bits in
>

Yes, the migration_bitmap_clear_discarded_pages clear the bits in
KVM in:
ramblock_dirty_bitmap_clear_discarded_pages
    -> dirty_bitmap_clear_section
        -> migration_clear_memory_region_dirty_bitmap_range
            -> migration_clear_memory_region_dirty_bitmap
                -> memory_region_clear_dirty_bitmap
                    -> KVM_CLEAR_DIRTY_LOG ioctl


> KVM, but I recall that there was something special about the first
> bitmap sync.
>
> --
> Cheers,
>
> David / dhildenb
>
>

-- 
Best regards
Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by David Hildenbrand 1 week, 4 days ago
On 11.11.24 12:37, Yong Huang wrote:
> 
> 
> On Mon, Nov 11, 2024 at 6:42 PM David Hildenbrand <david@redhat.com 
> <mailto:david@redhat.com>> wrote:
> 
>     On 11.11.24 11:08, Yong Huang wrote:
>      >
>      >
>      > On Mon, Nov 11, 2024 at 5:27 PM David Hildenbrand
>     <david@redhat.com <mailto:david@redhat.com>
>      > <mailto:david@redhat.com <mailto:david@redhat.com>>> wrote:
>      >
>      >     On 09.11.24 05:59, Hyman Huang wrote:
>      >      > The first iteration's RAMBlock dirty sync can be omitted
>     because QEMU
>      >      > always initializes the RAMBlock's bmap to all 1s by default.
>      >      >
>      >      > Signed-off-by: Hyman Huang <yong.huang@smartx.com
>     <mailto:yong.huang@smartx.com>
>      >     <mailto:yong.huang@smartx.com <mailto:yong.huang@smartx.com>>>
>      >      > ---
>      >      >   migration/cpu-throttle.c |  2 +-
>      >      >   migration/ram.c          | 11 ++++++++---
>      >      >   2 files changed, 9 insertions(+), 4 deletions(-)
>      >      >
>      >      > diff --git a/migration/cpu-throttle.c b/migration/cpu-
>     throttle.c
>      >      > index 5179019e33..674dc2004e 100644
>      >      > --- a/migration/cpu-throttle.c
>      >      > +++ b/migration/cpu-throttle.c
>      >      > @@ -141,7 +141,7 @@ void
>     cpu_throttle_dirty_sync_timer_tick(void
>      >     *opaque)
>      >      >        * effect on guest performance, therefore omit it to
>     avoid
>      >      >        * paying extra for the sync penalty.
>      >      >        */
>      >      > -    if (sync_cnt <= 1) {
>      >      > +    if (!sync_cnt) {
>      >      >           goto end;
>      >      >       }
>      >      >
>      >      > diff --git a/migration/ram.c b/migration/ram.c
>      >      > index 05ff9eb328..571dba10b7 100644
>      >      > --- a/migration/ram.c
>      >      > +++ b/migration/ram.c
>      >      > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
>      >      >   {
>      >      >       MigrationState *ms = migrate_get_current();
>      >      >       RAMBlock *block;
>      >      > -    unsigned long pages;
>      >      > +    unsigned long pages, clear_bmap_pages;
>      >      >       uint8_t shift;
>      >      >
>      >      >       /* Skip setting bitmap if there is no RAM */
>      >      > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
>      >      >
>      >      >           RAMBLOCK_FOREACH_NOT_IGNORED(block) {
>      >      >               pages = block->max_length >> TARGET_PAGE_BITS;
>      >      > +            clear_bmap_pages = clear_bmap_size(pages, shift);
>      >      >               /*
>      >      >                * The initial dirty bitmap for migration
>     must be
>      >     set with all
>      >      >                * ones to make sure we'll migrate every
>     guest RAM
>      >     page to
>      >      > @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
>      >      >                   block->file_bmap = bitmap_new(pages);
>      >      >               }
>      >      >               block->clear_bmap_shift = shift;
>      >      > -            block->clear_bmap =
>      >     bitmap_new(clear_bmap_size(pages, shift));
>      >      > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
>      >      > +            /*
>      >      > +             * Set clear_bmap to 1 unconditionally, as we
>     always
>      >     set bmap
>      >      > +             * to all 1s by default.
>      >      > +             */
>      >      > +            bitmap_set(block->clear_bmap, 0,
>     clear_bmap_pages);
>      >      >           }
>      >      >       }
>      >      >   }
>      >      > @@ -2783,7 +2789,6 @@ static bool
>     ram_init_bitmaps(RAMState *rs,
>      >     Error **errp)
>      >      >               if (!ret) {
>      >      >                   goto out_unlock;
>      >      >               }
>      >      > -            migration_bitmap_sync_precopy(false);
>      >      >           }
>      >      >       }
>      >      >   out_unlock:
>      >
>      >
>      >     For virtio-mem, we rely on the
>     migration_bitmap_clear_discarded_pages()
>      >     call to clear all bits that correspond to unplugged memory
>     ranges.
>      >
>      >
>      >     If we ommit the sync, we can likely have bits of unplugged
>     ranges still
>      >     set to "1", meaning we would try migrate them later, although we
>      >     shouldn't?
>      >
>      >
>      >
>      > IIUC, migration_bitmap_clear_discarded_pagesis still called at
>     the end of
>      > ram_init_bitmaps no matter if we omit the first sync.
>       > > PRECOPY_NOTIFY_SETUPnotification is sent out at the end of
>      > ram_save_setup(ram_list_init_bitmaps),when
>      > virtio_balloon_free_page_start() is
>      > called,migration_bitmap_clear_discarded_pages() has already
>     completed
>      > and the
>      > bmap has been correctly cleared.
>      >
>      > ram_save_setup
>      >     -> ram_list_init_bitmaps
>      >         -> migration_bitmap_clear_discarded_pages
>      >      -> return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
>      >
>      > You can double check it.
> 
>     That's not my concern, let me clarify :)
> 
> 
>     Assume in KVM the bitmap is all 1s ("everything dirty").
> 
>     In current code, we will sync the bitmap once (IIRC, clearing any dirty
>     bits from KVM).
> 
> 
> For the old logic, write-protect and clear dirty bits are all done in
> the KVM_GET_DIRTY_LOG API, while with
> KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 feature enabled, clearing
> dirty bits are postponed in the KVM_CLEAR_DIRTY_LOG API, which
> is called right before page sending in the migration thread in QEMU.
> 
> 
>     Then we call migration_bitmap_clear_discarded_pages() to clear all
>     "discarded" pages that we shouldn't touch.
> 
>     When we do the next bitmap sync, we will not get a "1" for discarded
>     ranges, and we will never try migrating discarded ranges.
> 
> 
>     With your patch, we're omitting the first sync. Could we possibly get
>     discarded ranges reported from KVM as dirty during the "now first" sync
>     *after* the migration_bitmap_clear_discarded_pages() call, and try
>     migrating discarded ranges?
> 
>     I did not dive deep into the code, maybe
>     migration_bitmap_clear_discarded_pages() ends up clearing the bits in
> 
> 
> Yes, the migration_bitmap_clear_discarded_pages clear the bits in
> KVM in:
> ramblock_dirty_bitmap_clear_discarded_pages
>      -> dirty_bitmap_clear_section
>          -> migration_clear_memory_region_dirty_bitmap_range
>              -> migration_clear_memory_region_dirty_bitmap
>                  -> memory_region_clear_dirty_bitmap
>                      -> KVM_CLEAR_DIRTY_LOG ioctl
> 

I just tried, and your patch breaks virtio-mem migration as I suspected.

sudo build/qemu-system-x86_64 \
     --enable-kvm \
     -m 16G,maxmem=24G \
     -object memory-backend-ram,id=mem1,size=16G \
     -machine q35,memory-backend=mem1 \
     -cpu max \
     -smp 16 \
     -nographic \
     -nodefaults \
     -net nic -net user \
     -chardev stdio,nosignal,id=serial \
     -hda Fedora-Server-KVM-40-1.14.x86_64.qcow2 \
     -cdrom /home/dhildenb/git/cloud-init/cloud-init.iso \
     -device isa-serial,chardev=serial \
     -chardev socket,id=monitor,path=/var/tmp/mon_src,server,nowait \
     -mon chardev=monitor,mode=readline \
     -device pcie-root-port,id=root,slot=0 \
     -object memory-backend-file,share=on,mem-path=/dev/shm/vm,id=mem2,size=8G \
     -device virtio-mem-pci,id=vmem0,memdev=mem2,requested-size=16M,bus=root,dynamic-memslots=on,prealloc=on \


Once the VM booted up, as expected we're consuming 16M


$ stat /dev/shm/vm
  Datei: /dev/shm/vm
  Größe: 8589934592      Blöcke: 32768      EA Block: 4096   reguläre Datei
Gerät: 0/25     Inode: 2087        Verknüpfungen: 1
tmpfs                   tmpfs             16G   16M   16G   1% /dev/shm


Let's start a migration:

$ echo "migrate exec:cat>state" | sudo nc -U /var/tmp/mon_src


... and we end up reading discarded memory:

$ LANG=C df -ahT  | grep /dev/shm
tmpfs                   tmpfs             16G  8.0G  7.6G  52% /dev/shm



Running with TCG only also doesn't work. So somewhere, we have a bitmap filled with
all 1s that is not cleared if we drop the first sync.

-- 
Cheers,

David / dhildenb


Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Peter Xu 1 week, 3 days ago
On Tue, Nov 12, 2024 at 11:08:44AM +0100, David Hildenbrand wrote:
> On 11.11.24 12:37, Yong Huang wrote:
> > 
> > 
> > On Mon, Nov 11, 2024 at 6:42 PM David Hildenbrand <david@redhat.com
> > <mailto:david@redhat.com>> wrote:
> > 
> >     On 11.11.24 11:08, Yong Huang wrote:
> >      >
> >      >
> >      > On Mon, Nov 11, 2024 at 5:27 PM David Hildenbrand
> >     <david@redhat.com <mailto:david@redhat.com>
> >      > <mailto:david@redhat.com <mailto:david@redhat.com>>> wrote:
> >      >
> >      >     On 09.11.24 05:59, Hyman Huang wrote:
> >      >      > The first iteration's RAMBlock dirty sync can be omitted
> >     because QEMU
> >      >      > always initializes the RAMBlock's bmap to all 1s by default.
> >      >      >
> >      >      > Signed-off-by: Hyman Huang <yong.huang@smartx.com
> >     <mailto:yong.huang@smartx.com>
> >      >     <mailto:yong.huang@smartx.com <mailto:yong.huang@smartx.com>>>
> >      >      > ---
> >      >      >   migration/cpu-throttle.c |  2 +-
> >      >      >   migration/ram.c          | 11 ++++++++---
> >      >      >   2 files changed, 9 insertions(+), 4 deletions(-)
> >      >      >
> >      >      > diff --git a/migration/cpu-throttle.c b/migration/cpu-
> >     throttle.c
> >      >      > index 5179019e33..674dc2004e 100644
> >      >      > --- a/migration/cpu-throttle.c
> >      >      > +++ b/migration/cpu-throttle.c
> >      >      > @@ -141,7 +141,7 @@ void
> >     cpu_throttle_dirty_sync_timer_tick(void
> >      >     *opaque)
> >      >      >        * effect on guest performance, therefore omit it to
> >     avoid
> >      >      >        * paying extra for the sync penalty.
> >      >      >        */
> >      >      > -    if (sync_cnt <= 1) {
> >      >      > +    if (!sync_cnt) {
> >      >      >           goto end;
> >      >      >       }
> >      >      >
> >      >      > diff --git a/migration/ram.c b/migration/ram.c
> >      >      > index 05ff9eb328..571dba10b7 100644
> >      >      > --- a/migration/ram.c
> >      >      > +++ b/migration/ram.c
> >      >      > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
> >      >      >   {
> >      >      >       MigrationState *ms = migrate_get_current();
> >      >      >       RAMBlock *block;
> >      >      > -    unsigned long pages;
> >      >      > +    unsigned long pages, clear_bmap_pages;
> >      >      >       uint8_t shift;
> >      >      >
> >      >      >       /* Skip setting bitmap if there is no RAM */
> >      >      > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
> >      >      >
> >      >      >           RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> >      >      >               pages = block->max_length >> TARGET_PAGE_BITS;
> >      >      > +            clear_bmap_pages = clear_bmap_size(pages, shift);
> >      >      >               /*
> >      >      >                * The initial dirty bitmap for migration
> >     must be
> >      >     set with all
> >      >      >                * ones to make sure we'll migrate every
> >     guest RAM
> >      >     page to
> >      >      > @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
> >      >      >                   block->file_bmap = bitmap_new(pages);
> >      >      >               }
> >      >      >               block->clear_bmap_shift = shift;
> >      >      > -            block->clear_bmap =
> >      >     bitmap_new(clear_bmap_size(pages, shift));
> >      >      > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> >      >      > +            /*
> >      >      > +             * Set clear_bmap to 1 unconditionally, as we
> >     always
> >      >     set bmap
> >      >      > +             * to all 1s by default.
> >      >      > +             */
> >      >      > +            bitmap_set(block->clear_bmap, 0,
> >     clear_bmap_pages);
> >      >      >           }
> >      >      >       }
> >      >      >   }
> >      >      > @@ -2783,7 +2789,6 @@ static bool
> >     ram_init_bitmaps(RAMState *rs,
> >      >     Error **errp)
> >      >      >               if (!ret) {
> >      >      >                   goto out_unlock;
> >      >      >               }
> >      >      > -            migration_bitmap_sync_precopy(false);
> >      >      >           }
> >      >      >       }
> >      >      >   out_unlock:
> >      >
> >      >
> >      >     For virtio-mem, we rely on the
> >     migration_bitmap_clear_discarded_pages()
> >      >     call to clear all bits that correspond to unplugged memory
> >     ranges.
> >      >
> >      >
> >      >     If we ommit the sync, we can likely have bits of unplugged
> >     ranges still
> >      >     set to "1", meaning we would try migrate them later, although we
> >      >     shouldn't?
> >      >
> >      >
> >      >
> >      > IIUC, migration_bitmap_clear_discarded_pagesis still called at
> >     the end of
> >      > ram_init_bitmaps no matter if we omit the first sync.
> >       > > PRECOPY_NOTIFY_SETUPnotification is sent out at the end of
> >      > ram_save_setup(ram_list_init_bitmaps),when
> >      > virtio_balloon_free_page_start() is
> >      > called,migration_bitmap_clear_discarded_pages() has already
> >     completed
> >      > and the
> >      > bmap has been correctly cleared.
> >      >
> >      > ram_save_setup
> >      >     -> ram_list_init_bitmaps
> >      >         -> migration_bitmap_clear_discarded_pages
> >      >      -> return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
> >      >
> >      > You can double check it.
> > 
> >     That's not my concern, let me clarify :)
> > 
> > 
> >     Assume in KVM the bitmap is all 1s ("everything dirty").
> > 
> >     In current code, we will sync the bitmap once (IIRC, clearing any dirty
> >     bits from KVM).
> > 
> > 
> > For the old logic, write-protect and clear dirty bits are all done in
> > the KVM_GET_DIRTY_LOG API, while with
> > KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 feature enabled, clearing
> > dirty bits are postponed in the KVM_CLEAR_DIRTY_LOG API, which
> > is called right before page sending in the migration thread in QEMU.
> > 
> > 
> >     Then we call migration_bitmap_clear_discarded_pages() to clear all
> >     "discarded" pages that we shouldn't touch.
> > 
> >     When we do the next bitmap sync, we will not get a "1" for discarded
> >     ranges, and we will never try migrating discarded ranges.
> > 
> > 
> >     With your patch, we're omitting the first sync. Could we possibly get
> >     discarded ranges reported from KVM as dirty during the "now first" sync
> >     *after* the migration_bitmap_clear_discarded_pages() call, and try
> >     migrating discarded ranges?
> > 
> >     I did not dive deep into the code, maybe
> >     migration_bitmap_clear_discarded_pages() ends up clearing the bits in
> > 
> > 
> > Yes, the migration_bitmap_clear_discarded_pages clear the bits in
> > KVM in:
> > ramblock_dirty_bitmap_clear_discarded_pages
> >      -> dirty_bitmap_clear_section
> >          -> migration_clear_memory_region_dirty_bitmap_range
> >              -> migration_clear_memory_region_dirty_bitmap
> >                  -> memory_region_clear_dirty_bitmap
> >                      -> KVM_CLEAR_DIRTY_LOG ioctl
> > 
> 
> I just tried, and your patch breaks virtio-mem migration as I suspected.
> 
> sudo build/qemu-system-x86_64 \
>     --enable-kvm \
>     -m 16G,maxmem=24G \
>     -object memory-backend-ram,id=mem1,size=16G \
>     -machine q35,memory-backend=mem1 \
>     -cpu max \
>     -smp 16 \
>     -nographic \
>     -nodefaults \
>     -net nic -net user \
>     -chardev stdio,nosignal,id=serial \
>     -hda Fedora-Server-KVM-40-1.14.x86_64.qcow2 \
>     -cdrom /home/dhildenb/git/cloud-init/cloud-init.iso \
>     -device isa-serial,chardev=serial \
>     -chardev socket,id=monitor,path=/var/tmp/mon_src,server,nowait \
>     -mon chardev=monitor,mode=readline \
>     -device pcie-root-port,id=root,slot=0 \
>     -object memory-backend-file,share=on,mem-path=/dev/shm/vm,id=mem2,size=8G \
>     -device virtio-mem-pci,id=vmem0,memdev=mem2,requested-size=16M,bus=root,dynamic-memslots=on,prealloc=on \
> 
> 
> Once the VM booted up, as expected we're consuming 16M
> 
> 
> $ stat /dev/shm/vm
>  Datei: /dev/shm/vm
>  Größe: 8589934592      Blöcke: 32768      EA Block: 4096   reguläre Datei
> Gerät: 0/25     Inode: 2087        Verknüpfungen: 1
> tmpfs                   tmpfs             16G   16M   16G   1% /dev/shm
> 
> 
> Let's start a migration:
> 
> $ echo "migrate exec:cat>state" | sudo nc -U /var/tmp/mon_src
> 
> 
> ... and we end up reading discarded memory:
> 
> $ LANG=C df -ahT  | grep /dev/shm
> tmpfs                   tmpfs             16G  8.0G  7.6G  52% /dev/shm
> 
> 
> 
> Running with TCG only also doesn't work. So somewhere, we have a bitmap filled with
> all 1s that is not cleared if we drop the first sync.

Hmm, I'm not yet sure why this happened, but indeed this reminds me that at
least vhost can have similar issue: when vhost devices are used, it has its
own bitmap so there it can keep having 1s in the unplugged regions when
reported the 1st time.

Is virtio-mem plug/unplug allowed during migration?  I'm wondering whether
below could happen while migration in progress:

  migration starts..
  bitmap init, disgard all unplugged mem in dirty bmap
  plug mem region X, dirty some pages
  unplug mem region X
  dirty sync, reports mem region X dirty (even though unplugged..)
  ...

So if unplugged mem should never be touched by qemu, then not sure whether
above can trigger this case too.

With/without above, I wonder if migration_bitmap_clear_discarded_pages()
shouldn't rely on the initial sync of dirty bitmap, instead it could be
done after each global sync: either another log_global_after_sync() hook,
or just move it over in migration_bitmap_sync().

Thanks,

-- 
Peter Xu


Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by David Hildenbrand 1 week, 3 days ago
On 13.11.24 18:40, Peter Xu wrote:
> On Tue, Nov 12, 2024 at 11:08:44AM +0100, David Hildenbrand wrote:
>> On 11.11.24 12:37, Yong Huang wrote:
>>>
>>>
>>> On Mon, Nov 11, 2024 at 6:42 PM David Hildenbrand <david@redhat.com
>>> <mailto:david@redhat.com>> wrote:
>>>
>>>      On 11.11.24 11:08, Yong Huang wrote:
>>>       >
>>>       >
>>>       > On Mon, Nov 11, 2024 at 5:27 PM David Hildenbrand
>>>      <david@redhat.com <mailto:david@redhat.com>
>>>       > <mailto:david@redhat.com <mailto:david@redhat.com>>> wrote:
>>>       >
>>>       >     On 09.11.24 05:59, Hyman Huang wrote:
>>>       >      > The first iteration's RAMBlock dirty sync can be omitted
>>>      because QEMU
>>>       >      > always initializes the RAMBlock's bmap to all 1s by default.
>>>       >      >
>>>       >      > Signed-off-by: Hyman Huang <yong.huang@smartx.com
>>>      <mailto:yong.huang@smartx.com>
>>>       >     <mailto:yong.huang@smartx.com <mailto:yong.huang@smartx.com>>>
>>>       >      > ---
>>>       >      >   migration/cpu-throttle.c |  2 +-
>>>       >      >   migration/ram.c          | 11 ++++++++---
>>>       >      >   2 files changed, 9 insertions(+), 4 deletions(-)
>>>       >      >
>>>       >      > diff --git a/migration/cpu-throttle.c b/migration/cpu-
>>>      throttle.c
>>>       >      > index 5179019e33..674dc2004e 100644
>>>       >      > --- a/migration/cpu-throttle.c
>>>       >      > +++ b/migration/cpu-throttle.c
>>>       >      > @@ -141,7 +141,7 @@ void
>>>      cpu_throttle_dirty_sync_timer_tick(void
>>>       >     *opaque)
>>>       >      >        * effect on guest performance, therefore omit it to
>>>      avoid
>>>       >      >        * paying extra for the sync penalty.
>>>       >      >        */
>>>       >      > -    if (sync_cnt <= 1) {
>>>       >      > +    if (!sync_cnt) {
>>>       >      >           goto end;
>>>       >      >       }
>>>       >      >
>>>       >      > diff --git a/migration/ram.c b/migration/ram.c
>>>       >      > index 05ff9eb328..571dba10b7 100644
>>>       >      > --- a/migration/ram.c
>>>       >      > +++ b/migration/ram.c
>>>       >      > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
>>>       >      >   {
>>>       >      >       MigrationState *ms = migrate_get_current();
>>>       >      >       RAMBlock *block;
>>>       >      > -    unsigned long pages;
>>>       >      > +    unsigned long pages, clear_bmap_pages;
>>>       >      >       uint8_t shift;
>>>       >      >
>>>       >      >       /* Skip setting bitmap if there is no RAM */
>>>       >      > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
>>>       >      >
>>>       >      >           RAMBLOCK_FOREACH_NOT_IGNORED(block) {
>>>       >      >               pages = block->max_length >> TARGET_PAGE_BITS;
>>>       >      > +            clear_bmap_pages = clear_bmap_size(pages, shift);
>>>       >      >               /*
>>>       >      >                * The initial dirty bitmap for migration
>>>      must be
>>>       >     set with all
>>>       >      >                * ones to make sure we'll migrate every
>>>      guest RAM
>>>       >     page to
>>>       >      > @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
>>>       >      >                   block->file_bmap = bitmap_new(pages);
>>>       >      >               }
>>>       >      >               block->clear_bmap_shift = shift;
>>>       >      > -            block->clear_bmap =
>>>       >     bitmap_new(clear_bmap_size(pages, shift));
>>>       >      > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
>>>       >      > +            /*
>>>       >      > +             * Set clear_bmap to 1 unconditionally, as we
>>>      always
>>>       >     set bmap
>>>       >      > +             * to all 1s by default.
>>>       >      > +             */
>>>       >      > +            bitmap_set(block->clear_bmap, 0,
>>>      clear_bmap_pages);
>>>       >      >           }
>>>       >      >       }
>>>       >      >   }
>>>       >      > @@ -2783,7 +2789,6 @@ static bool
>>>      ram_init_bitmaps(RAMState *rs,
>>>       >     Error **errp)
>>>       >      >               if (!ret) {
>>>       >      >                   goto out_unlock;
>>>       >      >               }
>>>       >      > -            migration_bitmap_sync_precopy(false);
>>>       >      >           }
>>>       >      >       }
>>>       >      >   out_unlock:
>>>       >
>>>       >
>>>       >     For virtio-mem, we rely on the
>>>      migration_bitmap_clear_discarded_pages()
>>>       >     call to clear all bits that correspond to unplugged memory
>>>      ranges.
>>>       >
>>>       >
>>>       >     If we ommit the sync, we can likely have bits of unplugged
>>>      ranges still
>>>       >     set to "1", meaning we would try migrate them later, although we
>>>       >     shouldn't?
>>>       >
>>>       >
>>>       >
>>>       > IIUC, migration_bitmap_clear_discarded_pagesis still called at
>>>      the end of
>>>       > ram_init_bitmaps no matter if we omit the first sync.
>>>        > > PRECOPY_NOTIFY_SETUPnotification is sent out at the end of
>>>       > ram_save_setup(ram_list_init_bitmaps),when
>>>       > virtio_balloon_free_page_start() is
>>>       > called,migration_bitmap_clear_discarded_pages() has already
>>>      completed
>>>       > and the
>>>       > bmap has been correctly cleared.
>>>       >
>>>       > ram_save_setup
>>>       >     -> ram_list_init_bitmaps
>>>       >         -> migration_bitmap_clear_discarded_pages
>>>       >      -> return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
>>>       >
>>>       > You can double check it.
>>>
>>>      That's not my concern, let me clarify :)
>>>
>>>
>>>      Assume in KVM the bitmap is all 1s ("everything dirty").
>>>
>>>      In current code, we will sync the bitmap once (IIRC, clearing any dirty
>>>      bits from KVM).
>>>
>>>
>>> For the old logic, write-protect and clear dirty bits are all done in
>>> the KVM_GET_DIRTY_LOG API, while with
>>> KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 feature enabled, clearing
>>> dirty bits are postponed in the KVM_CLEAR_DIRTY_LOG API, which
>>> is called right before page sending in the migration thread in QEMU.
>>>
>>>
>>>      Then we call migration_bitmap_clear_discarded_pages() to clear all
>>>      "discarded" pages that we shouldn't touch.
>>>
>>>      When we do the next bitmap sync, we will not get a "1" for discarded
>>>      ranges, and we will never try migrating discarded ranges.
>>>
>>>
>>>      With your patch, we're omitting the first sync. Could we possibly get
>>>      discarded ranges reported from KVM as dirty during the "now first" sync
>>>      *after* the migration_bitmap_clear_discarded_pages() call, and try
>>>      migrating discarded ranges?
>>>
>>>      I did not dive deep into the code, maybe
>>>      migration_bitmap_clear_discarded_pages() ends up clearing the bits in
>>>
>>>
>>> Yes, the migration_bitmap_clear_discarded_pages clear the bits in
>>> KVM in:
>>> ramblock_dirty_bitmap_clear_discarded_pages
>>>       -> dirty_bitmap_clear_section
>>>           -> migration_clear_memory_region_dirty_bitmap_range
>>>               -> migration_clear_memory_region_dirty_bitmap
>>>                   -> memory_region_clear_dirty_bitmap
>>>                       -> KVM_CLEAR_DIRTY_LOG ioctl
>>>
>>
>> I just tried, and your patch breaks virtio-mem migration as I suspected.
>>
>> sudo build/qemu-system-x86_64 \
>>      --enable-kvm \
>>      -m 16G,maxmem=24G \
>>      -object memory-backend-ram,id=mem1,size=16G \
>>      -machine q35,memory-backend=mem1 \
>>      -cpu max \
>>      -smp 16 \
>>      -nographic \
>>      -nodefaults \
>>      -net nic -net user \
>>      -chardev stdio,nosignal,id=serial \
>>      -hda Fedora-Server-KVM-40-1.14.x86_64.qcow2 \
>>      -cdrom /home/dhildenb/git/cloud-init/cloud-init.iso \
>>      -device isa-serial,chardev=serial \
>>      -chardev socket,id=monitor,path=/var/tmp/mon_src,server,nowait \
>>      -mon chardev=monitor,mode=readline \
>>      -device pcie-root-port,id=root,slot=0 \
>>      -object memory-backend-file,share=on,mem-path=/dev/shm/vm,id=mem2,size=8G \
>>      -device virtio-mem-pci,id=vmem0,memdev=mem2,requested-size=16M,bus=root,dynamic-memslots=on,prealloc=on \
>>
>>
>> Once the VM booted up, as expected we're consuming 16M
>>
>>
>> $ stat /dev/shm/vm
>>   Datei: /dev/shm/vm
>>   Größe: 8589934592      Blöcke: 32768      EA Block: 4096   reguläre Datei
>> Gerät: 0/25     Inode: 2087        Verknüpfungen: 1
>> tmpfs                   tmpfs             16G   16M   16G   1% /dev/shm
>>
>>
>> Let's start a migration:
>>
>> $ echo "migrate exec:cat>state" | sudo nc -U /var/tmp/mon_src
>>
>>
>> ... and we end up reading discarded memory:
>>
>> $ LANG=C df -ahT  | grep /dev/shm
>> tmpfs                   tmpfs             16G  8.0G  7.6G  52% /dev/shm
>>
>>
>>
>> Running with TCG only also doesn't work. So somewhere, we have a bitmap filled with
>> all 1s that is not cleared if we drop the first sync.
> 
> Hmm, I'm not yet sure why this happened, but indeed this reminds me that at
> least vhost can have similar issue: when vhost devices are used, it has its
> own bitmap so there it can keep having 1s in the unplugged regions when
> reported the 1st time.

I'm also surprised that it triggers even with TCG. Somewhere seems to be 
a bitmap with all 1s hiding :)

> 
> Is virtio-mem plug/unplug allowed during migration?  I'm wondering whether
> below could happen while migration in progress:
> 
>    migration starts..
>    bitmap init, disgard all unplugged mem in dirty bmap
>    plug mem region X, dirty some pages
>    unplug mem region X
>    dirty sync, reports mem region X dirty (even though unplugged..)
>    ...

No, for this (and other) reasons virtio_mem_is_busy() checks for 
"migration_in_incoming_postcopy() || migration_is_running();" and 
rejects any memory plug/unplug requests.

So the discarded state is stable while migration is running.

> 
> So if unplugged mem should never be touched by qemu, then not sure whether
> above can trigger this case too.
> 
> With/without above, I wonder if migration_bitmap_clear_discarded_pages()
> shouldn't rely on the initial sync of dirty bitmap, instead it could be
> done after each global sync: either another log_global_after_sync() hook,
> or just move it over in migration_bitmap_sync().

I think I had precisely that, and I recall you suggested to have it only 
after the initial sync. Would work for me, but I'd still like to 
understand why essentially none of the "discard" was effective -- all of 
guest RAM got touched.

-- 
Cheers,

David / dhildenb


Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Peter Xu 1 week, 2 days ago
On Wed, Nov 13, 2024 at 07:49:44PM +0100, David Hildenbrand wrote:
> I think I had precisely that, and I recall you suggested to have it only
> after the initial sync. Would work for me, but I'd still like to understand
> why essentially none of the "discard" was effective -- all of guest RAM got
> touched.

Yes it'll be interesting to know..

One thing I'm wildly guessing is dirty_memory_extend(), so maybe after the
ramblock is created nobody yet to clear the "1"s there for each of the
client, including DIRTY_MEMORY_MIGRATION.  Then it'll be synced to ramblock
bmap only in the initial sync, once for each qemu lifecycle.

-- 
Peter Xu
Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by David Hildenbrand 1 week, 2 days ago
On 13.11.24 21:12, Peter Xu wrote:
> On Wed, Nov 13, 2024 at 07:49:44PM +0100, David Hildenbrand wrote:
>> I think I had precisely that, and I recall you suggested to have it only
>> after the initial sync. Would work for me, but I'd still like to understand
>> why essentially none of the "discard" was effective -- all of guest RAM got
>> touched.
> 
> Yes it'll be interesting to know..
> 
> One thing I'm wildly guessing is dirty_memory_extend(), so maybe after the
> ramblock is created nobody yet to clear the "1"s there for each of the
> client, including DIRTY_MEMORY_MIGRATION.  Then it'll be synced to ramblock
> bmap only in the initial sync, once for each qemu lifecycle.


In ram_block_add() we do the

cpu_physical_memory_set_dirty_range(new_block->offset,
				    new_block->used_length,
				    DIRTY_CLIENTS_ALL);

ramblock_dirty_bitmap_clear_discarded_pages()->...->migration_clear_memory_region_dirty_bitmap_range()->migration_clear_memory_region_dirty_bitmap() 
won't end up clearing the bits in the dirty bitmap.

First I thought because of:

if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) {
     return;
}

But then I realized that even memory_region_clear_dirty_bitmap() will 
not clear the ramblock_dirty_bitmap_ bits! It's only concerned about 
listener->log_clear() calls.

Looking for DIRTY_MEMORY_BLOCK_SIZE users, only 
cpu_physical_memory_sync_dirty_bitmap() and 
cpu_physical_memory_clear_dirty_range() clear them, whereby the latter 
is only used when resizing RAMblocks.

At first I wondered whether 
ramblock_dirty_bitmap_clear_discarded_pages() should also call 
cpu_physical_memory_clear_dirty_range(), but then I am not so sure if 
that is really the right approach.


virtio-balloon() calls qemu_guest_free_page_hint() which calls

migration_clear_memory_region_dirty_bitmap_range()
bitmap_clear()

So it would maybe have the same issue.

-- 
Cheers,

David / dhildenb
Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Peter Xu 1 week, 1 day ago
On Thu, Nov 14, 2024 at 10:02:37AM +0100, David Hildenbrand wrote:
> On 13.11.24 21:12, Peter Xu wrote:
> > On Wed, Nov 13, 2024 at 07:49:44PM +0100, David Hildenbrand wrote:
> > > I think I had precisely that, and I recall you suggested to have it only
> > > after the initial sync. Would work for me, but I'd still like to understand
> > > why essentially none of the "discard" was effective -- all of guest RAM got
> > > touched.
> > 
> > Yes it'll be interesting to know..
> > 
> > One thing I'm wildly guessing is dirty_memory_extend(), so maybe after the
> > ramblock is created nobody yet to clear the "1"s there for each of the
> > client, including DIRTY_MEMORY_MIGRATION.  Then it'll be synced to ramblock
> > bmap only in the initial sync, once for each qemu lifecycle.
> 
> 
> In ram_block_add() we do the
> 
> cpu_physical_memory_set_dirty_range(new_block->offset,
> 				    new_block->used_length,
> 				    DIRTY_CLIENTS_ALL);
> 
> ramblock_dirty_bitmap_clear_discarded_pages()->...->migration_clear_memory_region_dirty_bitmap_range()->migration_clear_memory_region_dirty_bitmap()
> won't end up clearing the bits in the dirty bitmap.
> 
> First I thought because of:
> 
> if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) {
>     return;
> }
> 
> But then I realized that even memory_region_clear_dirty_bitmap() will not
> clear the ramblock_dirty_bitmap_ bits! It's only concerned about
> listener->log_clear() calls.
> 
> Looking for DIRTY_MEMORY_BLOCK_SIZE users, only
> cpu_physical_memory_sync_dirty_bitmap() and
> cpu_physical_memory_clear_dirty_range() clear them, whereby the latter is
> only used when resizing RAMblocks.
> 
> At first I wondered whether ramblock_dirty_bitmap_clear_discarded_pages()
> should also call cpu_physical_memory_clear_dirty_range(), but then I am not
> so sure if that is really the right approach.

That sounds actually reasonable to me so far.. What's the concern in your
mind?

> 
> 
> virtio-balloon() calls qemu_guest_free_page_hint() which calls
> 
> migration_clear_memory_region_dirty_bitmap_range()
> bitmap_clear()
> 
> So it would maybe have the same issue.

Should virtio-balloon do the same?

So I suppose the idea here is some module may want to say "we should ignore
these pages in the dirty bitmap", and so far that's only about migration.

Then cpu_physical_memory_clear_dirty_range() does look like the right thing
to do, in which case the bmap in ram_list used to be overlooked.. it seems.

But of course, cpu_physical_memory_clear_dirty_range() still doesn't cover
the migration bitmap itself, which is ramblock->bmap.  So we'll need to
switch from migration_clear_memory_region_dirty_bitmap() to use things like
cpu_physical_memory_clear_dirty_range(), just to cover ram_list bitmaps.
Then keeping the rb->bmap operations like before..

-- 
Peter Xu
Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by David Hildenbrand 1 week, 1 day ago
On 14.11.24 20:28, Peter Xu wrote:
> On Thu, Nov 14, 2024 at 10:02:37AM +0100, David Hildenbrand wrote:
>> On 13.11.24 21:12, Peter Xu wrote:
>>> On Wed, Nov 13, 2024 at 07:49:44PM +0100, David Hildenbrand wrote:
>>>> I think I had precisely that, and I recall you suggested to have it only
>>>> after the initial sync. Would work for me, but I'd still like to understand
>>>> why essentially none of the "discard" was effective -- all of guest RAM got
>>>> touched.
>>>
>>> Yes it'll be interesting to know..
>>>
>>> One thing I'm wildly guessing is dirty_memory_extend(), so maybe after the
>>> ramblock is created nobody yet to clear the "1"s there for each of the
>>> client, including DIRTY_MEMORY_MIGRATION.  Then it'll be synced to ramblock
>>> bmap only in the initial sync, once for each qemu lifecycle.
>>
>>
>> In ram_block_add() we do the
>>
>> cpu_physical_memory_set_dirty_range(new_block->offset,
>> 				    new_block->used_length,
>> 				    DIRTY_CLIENTS_ALL);
>>
>> ramblock_dirty_bitmap_clear_discarded_pages()->...->migration_clear_memory_region_dirty_bitmap_range()->migration_clear_memory_region_dirty_bitmap()
>> won't end up clearing the bits in the dirty bitmap.
>>
>> First I thought because of:
>>
>> if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) {
>>      return;
>> }
>>
>> But then I realized that even memory_region_clear_dirty_bitmap() will not
>> clear the ramblock_dirty_bitmap_ bits! It's only concerned about
>> listener->log_clear() calls.
>>
>> Looking for DIRTY_MEMORY_BLOCK_SIZE users, only
>> cpu_physical_memory_sync_dirty_bitmap() and
>> cpu_physical_memory_clear_dirty_range() clear them, whereby the latter is
>> only used when resizing RAMblocks.
>>
>> At first I wondered whether ramblock_dirty_bitmap_clear_discarded_pages()
>> should also call cpu_physical_memory_clear_dirty_range(), but then I am not
>> so sure if that is really the right approach.
> 
> That sounds actually reasonable to me so far.. What's the concern in your
> mind?

I think what I had in mind was that for the initial bitmap sync, when we 
set the bmap to all-1s already, we could just clear the whole 
ramblock_dirty_bitmap_ + KVM ... bitmaps.

So, instead of an "initial sync" we might just want to do an "initial 
clearing" of all bitmaps.

> 
>>
>>
>> virtio-balloon() calls qemu_guest_free_page_hint() which calls
>>
>> migration_clear_memory_region_dirty_bitmap_range()
>> bitmap_clear()
>>
>> So it would maybe have the same issue.
> 
> Should virtio-balloon do the same?

virtio-balloon is more interesting, because I assume here we could run 
after the "initial clearing" and would want to mark it clean everywhere.

> 
> So I suppose the idea here is some module may want to say "we should ignore
> these pages in the dirty bitmap", and so far that's only about migration.
> 
> Then cpu_physical_memory_clear_dirty_range() does look like the right thing
> to do, in which case the bmap in ram_list used to be overlooked.. it seems.
> 
> But of course, cpu_physical_memory_clear_dirty_range() still doesn't cover
> the migration bitmap itself, which is ramblock->bmap.  So we'll need to
> switch from migration_clear_memory_region_dirty_bitmap() to use things like
> cpu_physical_memory_clear_dirty_range(), just to cover ram_list bitmaps.
> Then keeping the rb->bmap operations like before..

For virtio-balloon likely yes. Regarding virtio-mem, maybe "initial 
clearing" + only modifying the rb->bmap when processing discards could 
work and would even be more efficient.

(but I'm confused because we have way too many bitmaps, and maybe the 
KVM one could be problematic without an initial sync ... we'd want an 
initial clearing for that as well ...)

-- 
Cheers,

David / dhildenb
Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Peter Xu 1 week, 1 day ago
On Thu, Nov 14, 2024 at 10:16:41PM +0100, David Hildenbrand wrote:
> On 14.11.24 20:28, Peter Xu wrote:
> > On Thu, Nov 14, 2024 at 10:02:37AM +0100, David Hildenbrand wrote:
> > > On 13.11.24 21:12, Peter Xu wrote:
> > > > On Wed, Nov 13, 2024 at 07:49:44PM +0100, David Hildenbrand wrote:
> > > > > I think I had precisely that, and I recall you suggested to have it only
> > > > > after the initial sync. Would work for me, but I'd still like to understand
> > > > > why essentially none of the "discard" was effective -- all of guest RAM got
> > > > > touched.
> > > > 
> > > > Yes it'll be interesting to know..
> > > > 
> > > > One thing I'm wildly guessing is dirty_memory_extend(), so maybe after the
> > > > ramblock is created nobody yet to clear the "1"s there for each of the
> > > > client, including DIRTY_MEMORY_MIGRATION.  Then it'll be synced to ramblock
> > > > bmap only in the initial sync, once for each qemu lifecycle.
> > > 
> > > 
> > > In ram_block_add() we do the
> > > 
> > > cpu_physical_memory_set_dirty_range(new_block->offset,
> > > 				    new_block->used_length,
> > > 				    DIRTY_CLIENTS_ALL);
> > > 
> > > ramblock_dirty_bitmap_clear_discarded_pages()->...->migration_clear_memory_region_dirty_bitmap_range()->migration_clear_memory_region_dirty_bitmap()
> > > won't end up clearing the bits in the dirty bitmap.
> > > 
> > > First I thought because of:
> > > 
> > > if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) {
> > >      return;
> > > }
> > > 
> > > But then I realized that even memory_region_clear_dirty_bitmap() will not
> > > clear the ramblock_dirty_bitmap_ bits! It's only concerned about
> > > listener->log_clear() calls.
> > > 
> > > Looking for DIRTY_MEMORY_BLOCK_SIZE users, only
> > > cpu_physical_memory_sync_dirty_bitmap() and
> > > cpu_physical_memory_clear_dirty_range() clear them, whereby the latter is
> > > only used when resizing RAMblocks.
> > > 
> > > At first I wondered whether ramblock_dirty_bitmap_clear_discarded_pages()
> > > should also call cpu_physical_memory_clear_dirty_range(), but then I am not
> > > so sure if that is really the right approach.
> > 
> > That sounds actually reasonable to me so far.. What's the concern in your
> > mind?
> 
> I think what I had in mind was that for the initial bitmap sync, when we set
> the bmap to all-1s already, we could just clear the whole
> ramblock_dirty_bitmap_ + KVM ... bitmaps.
> 
> So, instead of an "initial sync" we might just want to do an "initial
> clearing" of all bitmaps.

Logically most dirty tracking bitmaps should start with all zeros.  KVM old
kernels are like that; KVM_DIRTY_LOG_INITIALLY_SET is not, but it's a
separate feature.  I still hope it's pretty common for the rest, e.g. vhost
should have all zeros in its init bitmap even without initial sync.

> 
> > 
> > > 
> > > 
> > > virtio-balloon() calls qemu_guest_free_page_hint() which calls
> > > 
> > > migration_clear_memory_region_dirty_bitmap_range()
> > > bitmap_clear()
> > > 
> > > So it would maybe have the same issue.
> > 
> > Should virtio-balloon do the same?
> 
> virtio-balloon is more interesting, because I assume here we could run after
> the "initial clearing" and would want to mark it clean everywhere.

Yes, if it does what I mentioned below, IIUC it'll clear all dirty bits
across the whole stack.  Only the ram_list bitmap is missing.  IIUC it
could mean it could stop working for at least tcg, as tcg sololy uses
it.. even with kvm some MRs may use it.  Maybe we want to fix it
separately.

> 
> > 
> > So I suppose the idea here is some module may want to say "we should ignore
> > these pages in the dirty bitmap", and so far that's only about migration.
> > 
> > Then cpu_physical_memory_clear_dirty_range() does look like the right thing
> > to do, in which case the bmap in ram_list used to be overlooked.. it seems.
> > 
> > But of course, cpu_physical_memory_clear_dirty_range() still doesn't cover
> > the migration bitmap itself, which is ramblock->bmap.  So we'll need to
> > switch from migration_clear_memory_region_dirty_bitmap() to use things like
> > cpu_physical_memory_clear_dirty_range(), just to cover ram_list bitmaps.
> > Then keeping the rb->bmap operations like before..
> 
> For virtio-balloon likely yes. Regarding virtio-mem, maybe "initial
> clearing" + only modifying the rb->bmap when processing discards could work
> and would even be more efficient.
> 
> (but I'm confused because we have way too many bitmaps, and maybe the KVM
> one could be problematic without an initial sync ... we'd want an initial
> clearing for that as well ...)

So IMHO most of the bitmaps should be initialized with zeros, not
ones.. like I mentioned above.

Migration bitmap is special, because it's not about dirty tracking
capability / reporting but that we know we need to migrate the first round.
So setting all ones makes sense for migration only, not a reporting
facility.  While KVM_DIRTY_LOG_INITIALLY_SET existed for its own reasoning
on speeding up migration starts..

So, now I am thinking whether we should not set all ones in ram_list bitmap
at all, corresponds to this change:

===8<===
diff --git a/system/physmem.c b/system/physmem.c
index dc1db3a384..10966fa68c 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1913,10 +1913,6 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
     ram_list.version++;
     qemu_mutex_unlock_ramlist();
 
-    cpu_physical_memory_set_dirty_range(new_block->offset,
-                                        new_block->used_length,
-                                        DIRTY_CLIENTS_ALL);
-
     if (new_block->host) {
         qemu_ram_setup_dump(new_block->host, new_block->max_length);
         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
===8<===

I'm guessing whether above could fix the virtio-mem regression after
Hyman's current patch applied.

Said that, IMHO virtio-mem should still use the same helper just like
virtio-balloon as I discussed previously, so as to reset bitmap for the
whole stack (which seems to always be the right thing to do to not miss one
layer of them)?

Hence: 1 patch to virtio-balloon covering ram_list bitmap (which could be a
real fix to virtio-balloon on e.g. tcg?); 1 patch to virtio-mem reusing
that helper of virtio-balloon just as a cleanup to also cover all bitmaps;
1 patch like above to avoid setting ones at all in ram_list bitmap as
cleanup; then finally remove the sync() in SETUP, which is this patch.
IIUC after all these changes applied it'll work for all cases.

Thanks,

-- 
Peter Xu
Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by David Hildenbrand 1 week, 1 day ago
>>>>
>>>> But then I realized that even memory_region_clear_dirty_bitmap() will not
>>>> clear the ramblock_dirty_bitmap_ bits! It's only concerned about
>>>> listener->log_clear() calls.
>>>>
>>>> Looking for DIRTY_MEMORY_BLOCK_SIZE users, only
>>>> cpu_physical_memory_sync_dirty_bitmap() and
>>>> cpu_physical_memory_clear_dirty_range() clear them, whereby the latter is
>>>> only used when resizing RAMblocks.
>>>>
>>>> At first I wondered whether ramblock_dirty_bitmap_clear_discarded_pages()
>>>> should also call cpu_physical_memory_clear_dirty_range(), but then I am not
>>>> so sure if that is really the right approach.
>>>
>>> That sounds actually reasonable to me so far.. What's the concern in your
>>> mind?
>>
>> I think what I had in mind was that for the initial bitmap sync, when we set
>> the bmap to all-1s already, we could just clear the whole
>> ramblock_dirty_bitmap_ + KVM ... bitmaps.
>>
>> So, instead of an "initial sync" we might just want to do an "initial
>> clearing" of all bitmaps.
> 
> Logically most dirty tracking bitmaps should start with all zeros.  KVM old
> kernels are like that; KVM_DIRTY_LOG_INITIALLY_SET is not, but it's a
> separate feature.  I still hope it's pretty common for the rest, e.g. vhost
> should have all zeros in its init bitmap even without initial sync.

We better double check and document that, because it must be guaranteed, 
not "let's cross fingers".

Also, I'm not 100% sure how KVM internals implement that (I recall some 
s390x oddities, but might be wrong ...).

> 
>>
>>>
>>>>
>>>>
>>>> virtio-balloon() calls qemu_guest_free_page_hint() which calls
>>>>
>>>> migration_clear_memory_region_dirty_bitmap_range()
>>>> bitmap_clear()
>>>>
>>>> So it would maybe have the same issue.
>>>
>>> Should virtio-balloon do the same?
>>
>> virtio-balloon is more interesting, because I assume here we could run after
>> the "initial clearing" and would want to mark it clean everywhere.
> 
> Yes, if it does what I mentioned below, IIUC it'll clear all dirty bits
> across the whole stack.  Only the ram_list bitmap is missing.  IIUC it
> could mean it could stop working for at least tcg, as tcg sololy uses
> it.. even with kvm some MRs may use it.  Maybe we want to fix it
> separately.

Yes, that definitely needs care.

[...]

>> (but I'm confused because we have way too many bitmaps, and maybe the KVM
>> one could be problematic without an initial sync ... we'd want an initial
>> clearing for that as well ...)
> 
> So IMHO most of the bitmaps should be initialized with zeros, not
> ones.. like I mentioned above.
> 
> Migration bitmap is special, because it's not about dirty tracking
> capability / reporting but that we know we need to migrate the first round.
> So setting all ones makes sense for migration only, not a reporting
> facility.  While KVM_DIRTY_LOG_INITIALLY_SET existed for its own reasoning
> on speeding up migration starts..
> 
> So, now I am thinking whether we should not set all ones in ram_list bitmap
> at all, corresponds to this change:
> 
> ===8<===
> diff --git a/system/physmem.c b/system/physmem.c
> index dc1db3a384..10966fa68c 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -1913,10 +1913,6 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
>       ram_list.version++;
>       qemu_mutex_unlock_ramlist();
>   
> -    cpu_physical_memory_set_dirty_range(new_block->offset,
> -                                        new_block->used_length,
> -                                        DIRTY_CLIENTS_ALL);
> -
>       if (new_block->host) {
>           qemu_ram_setup_dump(new_block->host, new_block->max_length);
>           qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
> ===8<===
> 

It will leave things in an inconsistent state with regards of 
qemu_ram_resize() though. So we'd want to do the same thing there as well.

I don't know about DIRTY_CLIENTS_ALL, though ... no expert on that, 
which side effects it has.

Because saying "initially, all memory is dirty" can make sense, 
depending on from which angle you look at it.

> I'm guessing whether above could fix the virtio-mem regression after
> Hyman's current patch applied.
> 
> Said that, IMHO virtio-mem should still use the same helper just like
> virtio-balloon as I discussed previously, so as to reset bitmap for the
> whole stack (which seems to always be the right thing to do to not miss one
> layer of them)?

Well, the difference is that virtio-mem really only gets called exactly 
once initially. If we can just reset all bitmaps initially, then 
virtio-mem really only has to zap the rb->bmap.

The most robust solution would be to process discards after every bitmap 
sync of course.

> 
> Hence: 1 patch to virtio-balloon covering ram_list bitmap (which could be a
> real fix to virtio-balloon on e.g. tcg?); 1 patch to virtio-mem reusing
> that helper of virtio-balloon just as a cleanup to also cover all bitmaps;
> 1 patch like above to avoid setting ones at all in ram_list bitmap as
> cleanup; then finally remove the sync() in SETUP, which is this patch.
> IIUC after all these changes applied it'll work for all cases.

Would work for me.

BTW, I was wondering if we could get rid of one of the bitmaps with KVM 
... but likely not that easy.

-- 
Cheers,

David / dhildenb
Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Peter Xu 1 week, 1 day ago
On Fri, Nov 15, 2024 at 10:11:51AM +0100, David Hildenbrand wrote:
> > > > > 
> > > > > But then I realized that even memory_region_clear_dirty_bitmap() will not
> > > > > clear the ramblock_dirty_bitmap_ bits! It's only concerned about
> > > > > listener->log_clear() calls.
> > > > > 
> > > > > Looking for DIRTY_MEMORY_BLOCK_SIZE users, only
> > > > > cpu_physical_memory_sync_dirty_bitmap() and
> > > > > cpu_physical_memory_clear_dirty_range() clear them, whereby the latter is
> > > > > only used when resizing RAMblocks.
> > > > > 
> > > > > At first I wondered whether ramblock_dirty_bitmap_clear_discarded_pages()
> > > > > should also call cpu_physical_memory_clear_dirty_range(), but then I am not
> > > > > so sure if that is really the right approach.
> > > > 
> > > > That sounds actually reasonable to me so far.. What's the concern in your
> > > > mind?
> > > 
> > > I think what I had in mind was that for the initial bitmap sync, when we set
> > > the bmap to all-1s already, we could just clear the whole
> > > ramblock_dirty_bitmap_ + KVM ... bitmaps.
> > > 
> > > So, instead of an "initial sync" we might just want to do an "initial
> > > clearing" of all bitmaps.
> > 
> > Logically most dirty tracking bitmaps should start with all zeros.  KVM old
> > kernels are like that; KVM_DIRTY_LOG_INITIALLY_SET is not, but it's a
> > separate feature.  I still hope it's pretty common for the rest, e.g. vhost
> > should have all zeros in its init bitmap even without initial sync.
> 
> We better double check and document that, because it must be guaranteed, not
> "let's cross fingers".

Yes, we should double check on at least known good use cases, maybe not
all.

E.g., I see nvmm_log_sync() and whpx_log_sync() unconditionally set dirty
to all mem always.  I actually don't know how some of these trackers work
for live migration, or if it works at all.

If by accident this change breaks something, I also wonder whether we
should fix the patch that breaks it, or fix the assumption that the 1st
sync must happen at setup.  That's simply wrong to me.. and not all dirty
track users have such either.  E.g. vga tracking won't even have a SETUP
phase at all.

The simplest solution for any potential breakage is to provide a
log_global_start() and sync once there, that's exactly what SETUP should
do.  But I hope it won't happen at all..

This is legacy tech debt to me.  The earlier we understand it the better,
so I'm personally ok if someone would try to do this as long as major archs
will be safe.

> 
> Also, I'm not 100% sure how KVM internals implement that (I recall some
> s390x oddities, but might be wrong ...).
> 
> > 
> > > 
> > > > 
> > > > > 
> > > > > 
> > > > > virtio-balloon() calls qemu_guest_free_page_hint() which calls
> > > > > 
> > > > > migration_clear_memory_region_dirty_bitmap_range()
> > > > > bitmap_clear()
> > > > > 
> > > > > So it would maybe have the same issue.
> > > > 
> > > > Should virtio-balloon do the same?
> > > 
> > > virtio-balloon is more interesting, because I assume here we could run after
> > > the "initial clearing" and would want to mark it clean everywhere.
> > 
> > Yes, if it does what I mentioned below, IIUC it'll clear all dirty bits
> > across the whole stack.  Only the ram_list bitmap is missing.  IIUC it
> > could mean it could stop working for at least tcg, as tcg sololy uses
> > it.. even with kvm some MRs may use it.  Maybe we want to fix it
> > separately.
> 
> Yes, that definitely needs care.
> 
> [...]
> 
> > > (but I'm confused because we have way too many bitmaps, and maybe the KVM
> > > one could be problematic without an initial sync ... we'd want an initial
> > > clearing for that as well ...)
> > 
> > So IMHO most of the bitmaps should be initialized with zeros, not
> > ones.. like I mentioned above.
> > 
> > Migration bitmap is special, because it's not about dirty tracking
> > capability / reporting but that we know we need to migrate the first round.
> > So setting all ones makes sense for migration only, not a reporting
> > facility.  While KVM_DIRTY_LOG_INITIALLY_SET existed for its own reasoning
> > on speeding up migration starts..
> > 
> > So, now I am thinking whether we should not set all ones in ram_list bitmap
> > at all, corresponds to this change:
> > 
> > ===8<===
> > diff --git a/system/physmem.c b/system/physmem.c
> > index dc1db3a384..10966fa68c 100644
> > --- a/system/physmem.c
> > +++ b/system/physmem.c
> > @@ -1913,10 +1913,6 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
> >       ram_list.version++;
> >       qemu_mutex_unlock_ramlist();
> > -    cpu_physical_memory_set_dirty_range(new_block->offset,
> > -                                        new_block->used_length,
> > -                                        DIRTY_CLIENTS_ALL);
> > -
> >       if (new_block->host) {
> >           qemu_ram_setup_dump(new_block->host, new_block->max_length);
> >           qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
> > ===8<===
> > 
> 
> It will leave things in an inconsistent state with regards of
> qemu_ram_resize() though. So we'd want to do the same thing there as well.
> 
> I don't know about DIRTY_CLIENTS_ALL, though ... no expert on that, which
> side effects it has.
> 
> Because saying "initially, all memory is dirty" can make sense, depending on
> from which angle you look at it.

Migration definitely doesn't need it, so to be safe we could also make
CLIENT_ALL to CODE|VGA.

And if we agree virtio-mem will need to punch holes on all the bitmaps,
then this patch is even not needed.  For that, more below.

> 
> > I'm guessing whether above could fix the virtio-mem regression after
> > Hyman's current patch applied.
> > 
> > Said that, IMHO virtio-mem should still use the same helper just like
> > virtio-balloon as I discussed previously, so as to reset bitmap for the
> > whole stack (which seems to always be the right thing to do to not miss one
> > layer of them)?
> 
> Well, the difference is that virtio-mem really only gets called exactly once
> initially. If we can just reset all bitmaps initially, then virtio-mem
> really only has to zap the rb->bmap.

I think virtio-mem should better always punch through, regardless of
whether we can have above change. Consider cases like KVM "initial-all-set"
feature I mentioned above, when that feature bit is set by QEMU, KVM sets
all ones to the bitmap initially.  So that may be required for virtio-mem
to clear 1s in KVM at least.

> 
> The most robust solution would be to process discards after every bitmap
> sync of course.
> 
> > 
> > Hence: 1 patch to virtio-balloon covering ram_list bitmap (which could be a
> > real fix to virtio-balloon on e.g. tcg?); 1 patch to virtio-mem reusing
> > that helper of virtio-balloon just as a cleanup to also cover all bitmaps;
> > 1 patch like above to avoid setting ones at all in ram_list bitmap as
> > cleanup; then finally remove the sync() in SETUP, which is this patch.
> > IIUC after all these changes applied it'll work for all cases.
> 
> Would work for me.
> 
> BTW, I was wondering if we could get rid of one of the bitmaps with KVM ...
> but likely not that easy.

Yeh.  The memslots' bitmap can be completely overwritten by KVM kernel, so
at least we can't simply make use of the migration ram_list bitmap there..

-- 
Peter Xu
Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by David Hildenbrand 1 week, 1 day ago
>> We better double check and document that, because it must be guaranteed, not
>> "let's cross fingers".
> 
> Yes, we should double check on at least known good use cases, maybe not
> all.
> 
> E.g., I see nvmm_log_sync() and whpx_log_sync() unconditionally set dirty
> to all mem always.  I actually don't know how some of these trackers work
> for live migration, or if it works at all.
> 
> If by accident this change breaks something, I also wonder whether we
> should fix the patch that breaks it, or fix the assumption that the 1st
> sync must happen at setup.  That's simply wrong to me.. and not all dirty
> track users have such either.  E.g. vga tracking won't even have a SETUP
> phase at all.
> 
> The simplest solution for any potential breakage is to provide a
> log_global_start() and sync once there, that's exactly what SETUP should
> do.  But I hope it won't happen at all..
> 
> This is legacy tech debt to me.  The earlier we understand it the better,
> so I'm personally ok if someone would try to do this as long as major archs
> will be safe.
> 
>>
>> Also, I'm not 100% sure how KVM internals implement that (I recall some
>> s390x oddities, but might be wrong ...).
>>
>>>
>>>>
>>>>>
>>>>>>
>>>>>>
>>>>>> virtio-balloon() calls qemu_guest_free_page_hint() which calls
>>>>>>
>>>>>> migration_clear_memory_region_dirty_bitmap_range()
>>>>>> bitmap_clear()
>>>>>>
>>>>>> So it would maybe have the same issue.
>>>>>
>>>>> Should virtio-balloon do the same?
>>>>
>>>> virtio-balloon is more interesting, because I assume here we could run after
>>>> the "initial clearing" and would want to mark it clean everywhere.
>>>
>>> Yes, if it does what I mentioned below, IIUC it'll clear all dirty bits
>>> across the whole stack.  Only the ram_list bitmap is missing.  IIUC it
>>> could mean it could stop working for at least tcg, as tcg sololy uses
>>> it.. even with kvm some MRs may use it.  Maybe we want to fix it
>>> separately.
>>
>> Yes, that definitely needs care.
>>
>> [...]
>>
>>>> (but I'm confused because we have way too many bitmaps, and maybe the KVM
>>>> one could be problematic without an initial sync ... we'd want an initial
>>>> clearing for that as well ...)
>>>
>>> So IMHO most of the bitmaps should be initialized with zeros, not
>>> ones.. like I mentioned above.
>>>
>>> Migration bitmap is special, because it's not about dirty tracking
>>> capability / reporting but that we know we need to migrate the first round.
>>> So setting all ones makes sense for migration only, not a reporting
>>> facility.  While KVM_DIRTY_LOG_INITIALLY_SET existed for its own reasoning
>>> on speeding up migration starts..
>>>
>>> So, now I am thinking whether we should not set all ones in ram_list bitmap
>>> at all, corresponds to this change:
>>>
>>> ===8<===
>>> diff --git a/system/physmem.c b/system/physmem.c
>>> index dc1db3a384..10966fa68c 100644
>>> --- a/system/physmem.c
>>> +++ b/system/physmem.c
>>> @@ -1913,10 +1913,6 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
>>>        ram_list.version++;
>>>        qemu_mutex_unlock_ramlist();
>>> -    cpu_physical_memory_set_dirty_range(new_block->offset,
>>> -                                        new_block->used_length,
>>> -                                        DIRTY_CLIENTS_ALL);
>>> -
>>>        if (new_block->host) {
>>>            qemu_ram_setup_dump(new_block->host, new_block->max_length);
>>>            qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
>>> ===8<===
>>>
>>
>> It will leave things in an inconsistent state with regards of
>> qemu_ram_resize() though. So we'd want to do the same thing there as well.
>>
>> I don't know about DIRTY_CLIENTS_ALL, though ... no expert on that, which
>> side effects it has.
>>
>> Because saying "initially, all memory is dirty" can make sense, depending on
>> from which angle you look at it.
> 
> Migration definitely doesn't need it, so to be safe we could also make
> CLIENT_ALL to CODE|VGA.
> 
> And if we agree virtio-mem will need to punch holes on all the bitmaps,
> then this patch is even not needed.  For that, more below.

Yes, it's fine for me as a fix. It's not optimal, but I don't really 
care as long as it works :)

> 
>>
>>> I'm guessing whether above could fix the virtio-mem regression after
>>> Hyman's current patch applied.
>>>
>>> Said that, IMHO virtio-mem should still use the same helper just like
>>> virtio-balloon as I discussed previously, so as to reset bitmap for the
>>> whole stack (which seems to always be the right thing to do to not miss one
>>> layer of them)?
>>
>> Well, the difference is that virtio-mem really only gets called exactly once
>> initially. If we can just reset all bitmaps initially, then virtio-mem
>> really only has to zap the rb->bmap.
> 
> I think virtio-mem should better always punch through, regardless of
> whether we can have above change. Consider cases like KVM "initial-all-set"
> feature I mentioned above, when that feature bit is set by QEMU, KVM sets
> all ones to the bitmap initially.  So that may be required for virtio-mem
> to clear 1s in KVM at least.

Yes.

Anyhow, I'm happy as long as we don't break virtio-mem. Clearing all 
bitmaps will work.

Getting virtio-mem to clear discarded after every bitmap sync would be 
even more robust, for example, if we'd had some stupid sync() 
implementation that simply always sets all memory dirty ...

-- 
Cheers,

David / dhildenb
Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Yong Huang 1 week, 5 days ago
On Mon, Nov 11, 2024 at 6:42 PM David Hildenbrand <david@redhat.com> wrote:

> On 11.11.24 11:08, Yong Huang wrote:
> >
> >
> > On Mon, Nov 11, 2024 at 5:27 PM David Hildenbrand <david@redhat.com
> > <mailto:david@redhat.com>> wrote:
> >
> >     On 09.11.24 05:59, Hyman Huang wrote:
> >      > The first iteration's RAMBlock dirty sync can be omitted because
> QEMU
> >      > always initializes the RAMBlock's bmap to all 1s by default.
> >      >
> >      > Signed-off-by: Hyman Huang <yong.huang@smartx.com
> >     <mailto:yong.huang@smartx.com>>
> >      > ---
> >      >   migration/cpu-throttle.c |  2 +-
> >      >   migration/ram.c          | 11 ++++++++---
> >      >   2 files changed, 9 insertions(+), 4 deletions(-)
> >      >
> >      > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c
> >      > index 5179019e33..674dc2004e 100644
> >      > --- a/migration/cpu-throttle.c
> >      > +++ b/migration/cpu-throttle.c
> >      > @@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void
> >     *opaque)
> >      >        * effect on guest performance, therefore omit it to avoid
> >      >        * paying extra for the sync penalty.
> >      >        */
> >      > -    if (sync_cnt <= 1) {
> >      > +    if (!sync_cnt) {
> >      >           goto end;
> >      >       }
> >      >
> >      > diff --git a/migration/ram.c b/migration/ram.c
> >      > index 05ff9eb328..571dba10b7 100644
> >      > --- a/migration/ram.c
> >      > +++ b/migration/ram.c
> >      > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)
> >      >   {
> >      >       MigrationState *ms = migrate_get_current();
> >      >       RAMBlock *block;
> >      > -    unsigned long pages;
> >      > +    unsigned long pages, clear_bmap_pages;
> >      >       uint8_t shift;
> >      >
> >      >       /* Skip setting bitmap if there is no RAM */
> >      > @@ -2736,6 +2736,7 @@ static void ram_list_init_bitmaps(void)
> >      >
> >      >           RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> >      >               pages = block->max_length >> TARGET_PAGE_BITS;
> >      > +            clear_bmap_pages = clear_bmap_size(pages, shift);
> >      >               /*
> >      >                * The initial dirty bitmap for migration must be
> >     set with all
> >      >                * ones to make sure we'll migrate every guest RAM
> >     page to
> >      > @@ -2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
> >      >                   block->file_bmap = bitmap_new(pages);
> >      >               }
> >      >               block->clear_bmap_shift = shift;
> >      > -            block->clear_bmap =
> >     bitmap_new(clear_bmap_size(pages, shift));
> >      > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> >      > +            /*
> >      > +             * Set clear_bmap to 1 unconditionally, as we always
> >     set bmap
> >      > +             * to all 1s by default.
> >      > +             */
> >      > +            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
> >      >           }
> >      >       }
> >      >   }
> >      > @@ -2783,7 +2789,6 @@ static bool ram_init_bitmaps(RAMState *rs,
> >     Error **errp)
> >      >               if (!ret) {
> >      >                   goto out_unlock;
> >      >               }
> >      > -            migration_bitmap_sync_precopy(false);
> >      >           }
> >      >       }
> >      >   out_unlock:
> >
> >
> >     For virtio-mem, we rely on the
> migration_bitmap_clear_discarded_pages()
> >     call to clear all bits that correspond to unplugged memory ranges.
> >
> >
> >     If we ommit the sync, we can likely have bits of unplugged ranges
> still
> >     set to "1", meaning we would try migrate them later, although we
> >     shouldn't?
> >
> >
> >
> > IIUC, migration_bitmap_clear_discarded_pagesis still called at the end of
> > ram_init_bitmaps no matter if we omit the first sync.
>  > > PRECOPY_NOTIFY_SETUPnotification is sent out at the end of
> > ram_save_setup(ram_list_init_bitmaps),when
> > virtio_balloon_free_page_start() is
> > called,migration_bitmap_clear_discarded_pages() has already completed
> > and the
> > bmap has been correctly cleared.
> >
> > ram_save_setup
> >     -> ram_list_init_bitmaps
> >         -> migration_bitmap_clear_discarded_pages
> >      -> return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
> >
> > You can double check it.
>
> That's not my concern, let me clarify :)
>
>
> Assume in KVM the bitmap is all 1s ("everything dirty").
>
> In current code, we will sync the bitmap once (IIRC, clearing any dirty
> bits from KVM).
>
> Then we call migration_bitmap_clear_discarded_pages() to clear all
> "discarded" pages that we shouldn't touch.
>
> When we do the next bitmap sync, we will not get a "1" for discarded
> ranges, and we will never try migrating discarded ranges.
>
>
> With your patch, we're omitting the first sync. Could we possibly get
> discarded ranges reported from KVM as dirty during the "now first" sync
> *after* the migration_bitmap_clear_discarded_pages() call, and try
> migrating discarded ranges?
>
> I did not dive deep into the code, maybe
> migration_bitmap_clear_discarded_pages() ends up clearing the bits in
> KVM, but I recall that there was something special about the first
> bitmap sync.
>
> --
> Cheers,
>
> David / dhildenb
>
>

-- 
Best regards
RE: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Wang, Wei W 1 week, 5 days ago
On Saturday, November 9, 2024 1:00 PM, Hyman Huang wrote:
> The first iteration's RAMBlock dirty sync can be omitted because QEMU
> always initializes the RAMBlock's bmap to all 1s by default.
> 
> Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> ---
>  migration/cpu-throttle.c |  2 +-
>  migration/ram.c          | 11 ++++++++---
>  2 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c index
> 5179019e33..674dc2004e 100644
> --- a/migration/cpu-throttle.c
> +++ b/migration/cpu-throttle.c
> @@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void
> *opaque)
>       * effect on guest performance, therefore omit it to avoid
>       * paying extra for the sync penalty.
>       */
> -    if (sync_cnt <= 1) {
> +    if (!sync_cnt) {
>          goto end;
>      }
> 
> diff --git a/migration/ram.c b/migration/ram.c index
> 05ff9eb328..571dba10b7 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)  {
>      MigrationState *ms = migrate_get_current();
>      RAMBlock *block;
> -    unsigned long pages;
> +    unsigned long pages, clear_bmap_pages;
>      uint8_t shift;
> 
>      /* Skip setting bitmap if there is no RAM */ @@ -2736,6 +2736,7 @@
> static void ram_list_init_bitmaps(void)
> 
>          RAMBLOCK_FOREACH_NOT_IGNORED(block) {
>              pages = block->max_length >> TARGET_PAGE_BITS;
> +            clear_bmap_pages = clear_bmap_size(pages, shift);
>              /*
>               * The initial dirty bitmap for migration must be set with all
>               * ones to make sure we'll migrate every guest RAM page to @@ -
> 2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
>                  block->file_bmap = bitmap_new(pages);
>              }
>              block->clear_bmap_shift = shift;
> -            block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
> +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> +            /*
> +             * Set clear_bmap to 1 unconditionally, as we always set bmap
> +             * to all 1s by default.
> +             */
> +            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
>          }
>      }
>  }
> @@ -2783,7 +2789,6 @@ static bool ram_init_bitmaps(RAMState *rs, Error
> **errp)
>              if (!ret) {
>                  goto out_unlock;
>              }
> -            migration_bitmap_sync_precopy(false);

Would this affect the statistics collected in migration_bitmap_sync_precopy(),
e.g. rs->migration_dirty_pages?
Re: [PATCH v1 2/2] migration: Do not perform RAMBlock dirty sync during the first iteration
Posted by Yong Huang 1 week, 5 days ago
On Mon, Nov 11, 2024 at 5:07 PM Wang, Wei W <wei.w.wang@intel.com> wrote:

> On Saturday, November 9, 2024 1:00 PM, Hyman Huang wrote:
> > The first iteration's RAMBlock dirty sync can be omitted because QEMU
> > always initializes the RAMBlock's bmap to all 1s by default.
> >
> > Signed-off-by: Hyman Huang <yong.huang@smartx.com>
> > ---
> >  migration/cpu-throttle.c |  2 +-
> >  migration/ram.c          | 11 ++++++++---
> >  2 files changed, 9 insertions(+), 4 deletions(-)
> >
> > diff --git a/migration/cpu-throttle.c b/migration/cpu-throttle.c index
> > 5179019e33..674dc2004e 100644
> > --- a/migration/cpu-throttle.c
> > +++ b/migration/cpu-throttle.c
> > @@ -141,7 +141,7 @@ void cpu_throttle_dirty_sync_timer_tick(void
> > *opaque)
> >       * effect on guest performance, therefore omit it to avoid
> >       * paying extra for the sync penalty.
> >       */
> > -    if (sync_cnt <= 1) {
> > +    if (!sync_cnt) {
> >          goto end;
> >      }
> >
> > diff --git a/migration/ram.c b/migration/ram.c index
> > 05ff9eb328..571dba10b7 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -2718,7 +2718,7 @@ static void ram_list_init_bitmaps(void)  {
> >      MigrationState *ms = migrate_get_current();
> >      RAMBlock *block;
> > -    unsigned long pages;
> > +    unsigned long pages, clear_bmap_pages;
> >      uint8_t shift;
> >
> >      /* Skip setting bitmap if there is no RAM */ @@ -2736,6 +2736,7 @@
> > static void ram_list_init_bitmaps(void)
> >
> >          RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> >              pages = block->max_length >> TARGET_PAGE_BITS;
> > +            clear_bmap_pages = clear_bmap_size(pages, shift);
> >              /*
> >               * The initial dirty bitmap for migration must be set with
> all
> >               * ones to make sure we'll migrate every guest RAM page to
> @@ -
> > 2751,7 +2752,12 @@ static void ram_list_init_bitmaps(void)
> >                  block->file_bmap = bitmap_new(pages);
> >              }
> >              block->clear_bmap_shift = shift;
> > -            block->clear_bmap = bitmap_new(clear_bmap_size(pages,
> shift));
> > +            block->clear_bmap = bitmap_new(clear_bmap_pages);
> > +            /*
> > +             * Set clear_bmap to 1 unconditionally, as we always set
> bmap
> > +             * to all 1s by default.
> > +             */
> > +            bitmap_set(block->clear_bmap, 0, clear_bmap_pages);
> >          }
> >      }
> >  }
> > @@ -2783,7 +2789,6 @@ static bool ram_init_bitmaps(RAMState *rs, Error
> > **errp)
> >              if (!ret) {
> >                  goto out_unlock;
> >              }
> > -            migration_bitmap_sync_precopy(false);
>
> Would this affect the statistics collected in
> migration_bitmap_sync_precopy(),
> e.g. rs->migration_dirty_pages?
>

For the non-first dirty sync, it does.

For the first dirty sync.

Since the migration_dirty_pages is initialized in
ram_state_init and updated by ram_bytes_total() rather than
migration_bitmap_sync_precopy:

(*rsp)->migration_dirty_pages = (*rsp)->ram_bytes_total >> TARGET_PAGE_BITS;

So it does not affect the statistics, please double check that.

Thanks for the comment,

Yong

-- 
Best regards