[PATCH v2 3/3] dump-guest-memory: Block live migration

Peter Xu posted 3 patches 4 years, 5 months ago
Maintainers: "Dr. David Alan Gilbert" <dgilbert@redhat.com>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, Juan Quintela <quintela@redhat.com>
There is a newer version of this series
[PATCH v2 3/3] dump-guest-memory: Block live migration
Posted by Peter Xu 4 years, 5 months ago
Both dump-guest-memory and live migration caches vm state at the beginning.
Either of them entering the other one will cause race on the vm state, and even
more severe on that (please refer to the crash report in the bug link).

Let's block live migration in dump-guest-memory, and that'll also block
dump-guest-memory if it detected that we're during a live migration.

Side note: migrate_del_blocker() can be called even if the blocker is not
inserted yet, so it's safe to unconditionally delete that blocker in
dump_cleanup (g_slist_remove allows no-entry-found case).

Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1996609
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 dump/dump.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/dump/dump.c b/dump/dump.c
index ab625909f3..9c1c1fb738 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -29,6 +29,7 @@
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "hw/misc/vmcoreinfo.h"
+#include "migration/blocker.h"
 
 #ifdef TARGET_X86_64
 #include "win_dump.h"
@@ -47,6 +48,8 @@
 
 #define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */
 
+static Error *dump_migration_blocker;
+
 #define ELF_NOTE_SIZE(hdr_size, name_size, desc_size)   \
     ((DIV_ROUND_UP((hdr_size), 4) +                     \
       DIV_ROUND_UP((name_size), 4) +                    \
@@ -101,6 +104,7 @@ static int dump_cleanup(DumpState *s)
             qemu_mutex_unlock_iothread();
         }
     }
+    migrate_del_blocker(dump_migration_blocker);
 
     return 0;
 }
@@ -1927,11 +1931,6 @@ void qmp_dump_guest_memory(bool paging, const char *file,
     Error *local_err = NULL;
     bool detach_p = false;
 
-    if (runstate_check(RUN_STATE_INMIGRATE)) {
-        error_setg(errp, "Dump not allowed during incoming migration.");
-        return;
-    }
-
     /* if there is a dump in background, we should wait until the dump
      * finished */
     if (dump_in_progress()) {
@@ -2005,6 +2004,21 @@ void qmp_dump_guest_memory(bool paging, const char *file,
         return;
     }
 
+    if (!dump_migration_blocker) {
+        error_setg(&dump_migration_blocker,
+                   "Live migration disabled: dump-guest-memory in progress");
+    }
+
+    /*
+     * Allows even for -only-migratable, but forbid migration during the
+     * process of dump guest memory.
+     */
+    if (migrate_add_blocker_internal(dump_migration_blocker, errp)) {
+        /* Remember to release the fd before passing it over to dump state */
+        close(fd);
+        return;
+    }
+
     s = &dump_state_global;
     dump_state_prepare(s);
 
-- 
2.31.1


Re: [PATCH v2 3/3] dump-guest-memory: Block live migration
Posted by Marc-André Lureau 4 years, 5 months ago
Hi

On Thu, Aug 26, 2021 at 10:58 PM Peter Xu <peterx@redhat.com> wrote:

> Both dump-guest-memory and live migration caches vm state at the beginning.
> Either of them entering the other one will cause race on the vm state, and
> even
> more severe on that (please refer to the crash report in the bug link).
>
> Let's block live migration in dump-guest-memory, and that'll also block
> dump-guest-memory if it detected that we're during a live migration.
>
> Side note: migrate_del_blocker() can be called even if the blocker is not
> inserted yet, so it's safe to unconditionally delete that blocker in
> dump_cleanup (g_slist_remove allows no-entry-found case).
>
> Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1996609
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  dump/dump.c | 24 +++++++++++++++++++-----
>  1 file changed, 19 insertions(+), 5 deletions(-)
>
> diff --git a/dump/dump.c b/dump/dump.c
> index ab625909f3..9c1c1fb738 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -29,6 +29,7 @@
>  #include "qemu/error-report.h"
>  #include "qemu/main-loop.h"
>  #include "hw/misc/vmcoreinfo.h"
> +#include "migration/blocker.h"
>
>  #ifdef TARGET_X86_64
>  #include "win_dump.h"
> @@ -47,6 +48,8 @@
>
>  #define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */
>
> +static Error *dump_migration_blocker;
> +
>  #define ELF_NOTE_SIZE(hdr_size, name_size, desc_size)   \
>      ((DIV_ROUND_UP((hdr_size), 4) +                     \
>        DIV_ROUND_UP((name_size), 4) +                    \
> @@ -101,6 +104,7 @@ static int dump_cleanup(DumpState *s)
>              qemu_mutex_unlock_iothread();
>          }
>      }
> +    migrate_del_blocker(dump_migration_blocker);
>
>      return 0;
>  }
> @@ -1927,11 +1931,6 @@ void qmp_dump_guest_memory(bool paging, const char
> *file,
>      Error *local_err = NULL;
>      bool detach_p = false;
>
> -    if (runstate_check(RUN_STATE_INMIGRATE)) {
> -        error_setg(errp, "Dump not allowed during incoming migration.");
> -        return;
> -    }
> -
>      /* if there is a dump in background, we should wait until the dump
>       * finished */
>      if (dump_in_progress()) {
> @@ -2005,6 +2004,21 @@ void qmp_dump_guest_memory(bool paging, const char
> *file,
>          return;
>      }
>
> +    if (!dump_migration_blocker) {
> +        error_setg(&dump_migration_blocker,
> +                   "Live migration disabled: dump-guest-memory in
> progress");
> +    }
> +
> +    /*
> +     * Allows even for -only-migratable, but forbid migration during the
> +     * process of dump guest memory.
> +     */
> +    if (migrate_add_blocker_internal(dump_migration_blocker, errp)) {
> +        /* Remember to release the fd before passing it over to dump
> state */
> +        close(fd);
> +        return;
> +    }
>

I would move it earlier.  Why not leave it at the beginning of the function
as it was?

Then dump_in_progress() is probably redundant too.

+
>      s = &dump_state_global;
>      dump_state_prepare(s);
>
> --
> 2.31.1
>
>
Re: [PATCH v2 3/3] dump-guest-memory: Block live migration
Posted by Marc-André Lureau 4 years, 5 months ago
Hi

On Fri, Aug 27, 2021 at 11:44 AM Marc-André Lureau <
marcandre.lureau@redhat.com> wrote:

> Hi
>
> On Thu, Aug 26, 2021 at 10:58 PM Peter Xu <peterx@redhat.com> wrote:
>
>> Both dump-guest-memory and live migration caches vm state at the
>> beginning.
>> Either of them entering the other one will cause race on the vm state,
>> and even
>> more severe on that (please refer to the crash report in the bug link).
>>
>> Let's block live migration in dump-guest-memory, and that'll also block
>> dump-guest-memory if it detected that we're during a live migration.
>>
>> Side note: migrate_del_blocker() can be called even if the blocker is not
>> inserted yet, so it's safe to unconditionally delete that blocker in
>> dump_cleanup (g_slist_remove allows no-entry-found case).
>>
>> Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1996609
>> Signed-off-by: Peter Xu <peterx@redhat.com>
>> ---
>>  dump/dump.c | 24 +++++++++++++++++++-----
>>  1 file changed, 19 insertions(+), 5 deletions(-)
>>
>> diff --git a/dump/dump.c b/dump/dump.c
>> index ab625909f3..9c1c1fb738 100644
>> --- a/dump/dump.c
>> +++ b/dump/dump.c
>> @@ -29,6 +29,7 @@
>>  #include "qemu/error-report.h"
>>  #include "qemu/main-loop.h"
>>  #include "hw/misc/vmcoreinfo.h"
>> +#include "migration/blocker.h"
>>
>>  #ifdef TARGET_X86_64
>>  #include "win_dump.h"
>> @@ -47,6 +48,8 @@
>>
>>  #define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */
>>
>> +static Error *dump_migration_blocker;
>> +
>>  #define ELF_NOTE_SIZE(hdr_size, name_size, desc_size)   \
>>      ((DIV_ROUND_UP((hdr_size), 4) +                     \
>>        DIV_ROUND_UP((name_size), 4) +                    \
>> @@ -101,6 +104,7 @@ static int dump_cleanup(DumpState *s)
>>              qemu_mutex_unlock_iothread();
>>          }
>>      }
>> +    migrate_del_blocker(dump_migration_blocker);
>>
>>      return 0;
>>  }
>> @@ -1927,11 +1931,6 @@ void qmp_dump_guest_memory(bool paging, const char
>> *file,
>>      Error *local_err = NULL;
>>      bool detach_p = false;
>>
>> -    if (runstate_check(RUN_STATE_INMIGRATE)) {
>> -        error_setg(errp, "Dump not allowed during incoming migration.");
>> -        return;
>> -    }
>> -
>>      /* if there is a dump in background, we should wait until the dump
>>       * finished */
>>      if (dump_in_progress()) {
>> @@ -2005,6 +2004,21 @@ void qmp_dump_guest_memory(bool paging, const char
>> *file,
>>          return;
>>      }
>>
>> +    if (!dump_migration_blocker) {
>> +        error_setg(&dump_migration_blocker,
>> +                   "Live migration disabled: dump-guest-memory in
>> progress");
>> +    }
>> +
>> +    /*
>> +     * Allows even for -only-migratable, but forbid migration during the
>> +     * process of dump guest memory.
>> +     */
>> +    if (migrate_add_blocker_internal(dump_migration_blocker, errp)) {
>> +        /* Remember to release the fd before passing it over to dump
>> state */
>> +        close(fd);
>> +        return;
>> +    }
>>
>
> I would move it earlier.  Why not leave it at the beginning of the
> function as it was?
>
>
Ah I think it's because dump_cleanup() isn't called when returning earlier.
But relying on a cleanup done outside of this function is not obvious.
Either dump_cleanup() should be called from here, or the blocker code
should be moved to dump_init() imho.



> Then dump_in_progress() is probably redundant too.
>




> +
>>      s = &dump_state_global;
>>      dump_state_prepare(s);
>>
>> --
>> 2.31.1
>>
>>
Re: [PATCH v2 3/3] dump-guest-memory: Block live migration
Posted by Peter Xu 4 years, 5 months ago
On Fri, Aug 27, 2021 at 11:51:04AM +0400, Marc-André Lureau wrote:
> Hi
> 
> On Fri, Aug 27, 2021 at 11:44 AM Marc-André Lureau <
> marcandre.lureau@redhat.com> wrote:
> 
> > Hi
> >
> > On Thu, Aug 26, 2021 at 10:58 PM Peter Xu <peterx@redhat.com> wrote:
> >
> >> Both dump-guest-memory and live migration caches vm state at the
> >> beginning.
> >> Either of them entering the other one will cause race on the vm state,
> >> and even
> >> more severe on that (please refer to the crash report in the bug link).
> >>
> >> Let's block live migration in dump-guest-memory, and that'll also block
> >> dump-guest-memory if it detected that we're during a live migration.
> >>
> >> Side note: migrate_del_blocker() can be called even if the blocker is not
> >> inserted yet, so it's safe to unconditionally delete that blocker in
> >> dump_cleanup (g_slist_remove allows no-entry-found case).
> >>
> >> Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> >> Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1996609
> >> Signed-off-by: Peter Xu <peterx@redhat.com>
> >> ---
> >>  dump/dump.c | 24 +++++++++++++++++++-----
> >>  1 file changed, 19 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/dump/dump.c b/dump/dump.c
> >> index ab625909f3..9c1c1fb738 100644
> >> --- a/dump/dump.c
> >> +++ b/dump/dump.c
> >> @@ -29,6 +29,7 @@
> >>  #include "qemu/error-report.h"
> >>  #include "qemu/main-loop.h"
> >>  #include "hw/misc/vmcoreinfo.h"
> >> +#include "migration/blocker.h"
> >>
> >>  #ifdef TARGET_X86_64
> >>  #include "win_dump.h"
> >> @@ -47,6 +48,8 @@
> >>
> >>  #define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */
> >>
> >> +static Error *dump_migration_blocker;
> >> +
> >>  #define ELF_NOTE_SIZE(hdr_size, name_size, desc_size)   \
> >>      ((DIV_ROUND_UP((hdr_size), 4) +                     \
> >>        DIV_ROUND_UP((name_size), 4) +                    \
> >> @@ -101,6 +104,7 @@ static int dump_cleanup(DumpState *s)
> >>              qemu_mutex_unlock_iothread();
> >>          }
> >>      }
> >> +    migrate_del_blocker(dump_migration_blocker);
> >>
> >>      return 0;
> >>  }
> >> @@ -1927,11 +1931,6 @@ void qmp_dump_guest_memory(bool paging, const char
> >> *file,
> >>      Error *local_err = NULL;
> >>      bool detach_p = false;
> >>
> >> -    if (runstate_check(RUN_STATE_INMIGRATE)) {
> >> -        error_setg(errp, "Dump not allowed during incoming migration.");
> >> -        return;
> >> -    }
> >> -
> >>      /* if there is a dump in background, we should wait until the dump
> >>       * finished */
> >>      if (dump_in_progress()) {
> >> @@ -2005,6 +2004,21 @@ void qmp_dump_guest_memory(bool paging, const char
> >> *file,
> >>          return;
> >>      }
> >>
> >> +    if (!dump_migration_blocker) {
> >> +        error_setg(&dump_migration_blocker,
> >> +                   "Live migration disabled: dump-guest-memory in
> >> progress");
> >> +    }
> >> +
> >> +    /*
> >> +     * Allows even for -only-migratable, but forbid migration during the
> >> +     * process of dump guest memory.
> >> +     */
> >> +    if (migrate_add_blocker_internal(dump_migration_blocker, errp)) {
> >> +        /* Remember to release the fd before passing it over to dump
> >> state */
> >> +        close(fd);
> >> +        return;
> >> +    }
> >>
> >
> > I would move it earlier.  Why not leave it at the beginning of the
> > function as it was?
> >
> >
> Ah I think it's because dump_cleanup() isn't called when returning earlier.
> But relying on a cleanup done outside of this function is not obvious.
> Either dump_cleanup() should be called from here, or the blocker code
> should be moved to dump_init() imho.

Note that dump_cleanup() cleans s->fd, however fd has not delievered to s->fd
now.

I can move it into dump_init() but I need to put it after setting s->fd to make
dump_cleanup work.  That could look a bit weird (as checking migration blocker
shouldn't really be bound to initialize of dump states).

There's another way to move add blocker to the entry of dump_in_progress()
check, however then all the checks between dump_in_progress() and before
opening the fd will not be able to "return" directly but we'll need to del the
blocker if any of those check fails.

That's how I come up with the current patch, which looks the easiest change and
still looks clean.

Besides, I think dump_in_progress() can be removed too with patch 1 checks
SAVEVM state too, however that's a side effect of migration_is_idle() and
dump_in_progres is doing the check in different ways by looking up dump state,
so I kept it.

Thanks,

-- 
Peter Xu


Re: [PATCH v2 3/3] dump-guest-memory: Block live migration
Posted by Marc-André Lureau 4 years, 4 months ago
Hi

On Thu, Aug 26, 2021 at 11:01 PM Peter Xu <peterx@redhat.com> wrote:

> Both dump-guest-memory and live migration caches vm state at the beginning.
> Either of them entering the other one will cause race on the vm state, and
> even
> more severe on that (please refer to the crash report in the bug link).
>
> Let's block live migration in dump-guest-memory, and that'll also block
> dump-guest-memory if it detected that we're during a live migration.
>
> Side note: migrate_del_blocker() can be called even if the blocker is not
> inserted yet, so it's safe to unconditionally delete that blocker in
> dump_cleanup (g_slist_remove allows no-entry-found case).
>
> Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1996609
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  dump/dump.c | 24 +++++++++++++++++++-----
>  1 file changed, 19 insertions(+), 5 deletions(-)
>
> diff --git a/dump/dump.c b/dump/dump.c
> index ab625909f3..9c1c1fb738 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -29,6 +29,7 @@
>  #include "qemu/error-report.h"
>  #include "qemu/main-loop.h"
>  #include "hw/misc/vmcoreinfo.h"
> +#include "migration/blocker.h"
>
>  #ifdef TARGET_X86_64
>  #include "win_dump.h"
> @@ -47,6 +48,8 @@
>
>  #define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */
>
> +static Error *dump_migration_blocker;
> +
>  #define ELF_NOTE_SIZE(hdr_size, name_size, desc_size)   \
>      ((DIV_ROUND_UP((hdr_size), 4) +                     \
>        DIV_ROUND_UP((name_size), 4) +                    \
> @@ -101,6 +104,7 @@ static int dump_cleanup(DumpState *s)
>              qemu_mutex_unlock_iothread();
>          }
>      }
> +    migrate_del_blocker(dump_migration_blocker);
>
>      return 0;
>  }
> @@ -1927,11 +1931,6 @@ void qmp_dump_guest_memory(bool paging, const char
> *file,
>      Error *local_err = NULL;
>      bool detach_p = false;
>
> -    if (runstate_check(RUN_STATE_INMIGRATE)) {
>

This INMIGRATE check,

-        error_setg(errp, "Dump not allowed during incoming migration.");
> -        return;
> -    }
> -
>      /* if there is a dump in background, we should wait until the dump
>       * finished */
>      if (dump_in_progress()) {
> @@ -2005,6 +2004,21 @@ void qmp_dump_guest_memory(bool paging, const char
> *file,
>          return;
>      }
>
> +    if (!dump_migration_blocker) {
> +        error_setg(&dump_migration_blocker,
> +                   "Live migration disabled: dump-guest-memory in
> progress");
> +    }
> +
> +    /*
> +     * Allows even for -only-migratable, but forbid migration during the
> +     * process of dump guest memory.
> +     */
> +    if (migrate_add_blocker_internal(dump_migration_blocker, errp)) {
>

is now handled here with migration_is_idle() ?

I am not familiar enough with the run & migration states intricacies here

+        /* Remember to release the fd before passing it over to dump state
> */
> +        close(fd);
> +        return;
> +    }
> +
>      s = &dump_state_global;
>      dump_state_prepare(s);
>
> --
> 2.31.1
>
>
>

-- 
Marc-André Lureau
Re: [PATCH v2 3/3] dump-guest-memory: Block live migration
Posted by Peter Xu 4 years, 4 months ago
On Wed, Sep 22, 2021 at 07:18:15PM +0400, Marc-André Lureau wrote:
> Hi

Hi, Marc-André,

> 
> On Thu, Aug 26, 2021 at 11:01 PM Peter Xu <peterx@redhat.com> wrote:
> 
> > Both dump-guest-memory and live migration caches vm state at the beginning.
> > Either of them entering the other one will cause race on the vm state, and
> > even
> > more severe on that (please refer to the crash report in the bug link).
> >
> > Let's block live migration in dump-guest-memory, and that'll also block
> > dump-guest-memory if it detected that we're during a live migration.
> >
> > Side note: migrate_del_blocker() can be called even if the blocker is not
> > inserted yet, so it's safe to unconditionally delete that blocker in
> > dump_cleanup (g_slist_remove allows no-entry-found case).
> >
> > Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1996609
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> > ---
> >  dump/dump.c | 24 +++++++++++++++++++-----
> >  1 file changed, 19 insertions(+), 5 deletions(-)
> >
> > diff --git a/dump/dump.c b/dump/dump.c
> > index ab625909f3..9c1c1fb738 100644
> > --- a/dump/dump.c
> > +++ b/dump/dump.c
> > @@ -29,6 +29,7 @@
> >  #include "qemu/error-report.h"
> >  #include "qemu/main-loop.h"
> >  #include "hw/misc/vmcoreinfo.h"
> > +#include "migration/blocker.h"
> >
> >  #ifdef TARGET_X86_64
> >  #include "win_dump.h"
> > @@ -47,6 +48,8 @@
> >
> >  #define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */
> >
> > +static Error *dump_migration_blocker;
> > +
> >  #define ELF_NOTE_SIZE(hdr_size, name_size, desc_size)   \
> >      ((DIV_ROUND_UP((hdr_size), 4) +                     \
> >        DIV_ROUND_UP((name_size), 4) +                    \
> > @@ -101,6 +104,7 @@ static int dump_cleanup(DumpState *s)
> >              qemu_mutex_unlock_iothread();
> >          }
> >      }
> > +    migrate_del_blocker(dump_migration_blocker);
> >
> >      return 0;
> >  }
> > @@ -1927,11 +1931,6 @@ void qmp_dump_guest_memory(bool paging, const char
> > *file,
> >      Error *local_err = NULL;
> >      bool detach_p = false;
> >
> > -    if (runstate_check(RUN_STATE_INMIGRATE)) {
> >
> 
> This INMIGRATE check,
> 
> -        error_setg(errp, "Dump not allowed during incoming migration.");
> > -        return;
> > -    }
> > -
> >      /* if there is a dump in background, we should wait until the dump
> >       * finished */
> >      if (dump_in_progress()) {
> > @@ -2005,6 +2004,21 @@ void qmp_dump_guest_memory(bool paging, const char
> > *file,
> >          return;
> >      }
> >
> > +    if (!dump_migration_blocker) {
> > +        error_setg(&dump_migration_blocker,
> > +                   "Live migration disabled: dump-guest-memory in
> > progress");
> > +    }
> > +
> > +    /*
> > +     * Allows even for -only-migratable, but forbid migration during the
> > +     * process of dump guest memory.
> > +     */
> > +    if (migrate_add_blocker_internal(dump_migration_blocker, errp)) {
> >
> 
> is now handled here with migration_is_idle() ?
> 
> I am not familiar enough with the run & migration states intricacies here

Hmm, I thought it covers both src/dst, but after I double checked it seems
not..

On the destination side, we seem to have used MigrationState somewhere like in
migration_channel_process_incoming for reading parameters and capabilities, but
OTOH we used MigrationIncomingState for most of the rest operations, e.g., to
maintain dst migration state changes.  Then migration_is_idle() won't work on
dst indeed..

I think we should rename migration_is_idle() to migration_src_is_idle() at some
point.

For now, I'll respin and just keep the RUN_STATE_INMIGRATE check above.

Thanks!

-- 
Peter Xu