[v1] Migration: postcopy failure recovery

[Qemu-devel] [RFC 23/29] migration: new cmd MIG_CMD_POSTCOPY_RESUME

Posted by Peter Xu 8 years, 6 months ago

Introducing this new command to be sent when the source VM is ready to
resume the paused migration.  What the destination does here is
basically release the fault thread to continue service page faults.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/savevm.c     | 27 +++++++++++++++++++++++++++
 migration/savevm.h     |  1 +
 migration/trace-events |  1 +
 3 files changed, 29 insertions(+)

diff --git a/migration/savevm.c b/migration/savevm.c
index def9213..2e330bc 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -77,6 +77,7 @@ enum qemu_vm_cmd {
     MIG_CMD_POSTCOPY_RAM_DISCARD,  /* A list of pages to discard that
                                       were previously sent during
                                       precopy but are dirty. */
+    MIG_CMD_POSTCOPY_RESUME,       /* resume postcopy on dest */
     MIG_CMD_PACKAGED,          /* Send a wrapped stream within this stream */
     MIG_CMD_RECV_BITMAP,       /* Request for recved bitmap on dst */
     MIG_CMD_MAX
@@ -95,6 +96,7 @@ static struct mig_cmd_args {
     [MIG_CMD_POSTCOPY_RUN]     = { .len =  0, .name = "POSTCOPY_RUN" },
     [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
                                    .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
+    [MIG_CMD_POSTCOPY_RESUME]  = { .len =  0, .name = "POSTCOPY_RESUME" },
     [MIG_CMD_PACKAGED]         = { .len =  4, .name = "PACKAGED" },
     [MIG_CMD_RECV_BITMAP]      = { .len = -1, .name = "RECV_BITMAP" },
     [MIG_CMD_MAX]              = { .len = -1, .name = "MAX" },
@@ -931,6 +933,12 @@ void qemu_savevm_send_postcopy_run(QEMUFile *f)
     qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
 }
 
+void qemu_savevm_send_postcopy_resume(QEMUFile *f)
+{
+    trace_savevm_send_postcopy_resume();
+    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RESUME, 0, NULL);
+}
+
 void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name)
 {
     size_t len;
@@ -1671,6 +1679,22 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
     return LOADVM_QUIT;
 }
 
+static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
+{
+    /*
+     * This means source VM is ready to resume the postcopy migration.
+     * It's time to switch state and release the fault thread to
+     * continue service page faults.
+     */
+    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
+                      MIGRATION_STATUS_POSTCOPY_ACTIVE);
+    qemu_sem_post(&mis->postcopy_pause_sem_fault);
+
+    /* TODO: Tell source that "we are ready" */
+
+    return 0;
+}
+
 /**
  * Immediately following this command is a blob of data containing an embedded
  * chunk of migration stream; read it and load it.
@@ -1834,6 +1858,9 @@ static int loadvm_process_command(QEMUFile *f)
     case MIG_CMD_POSTCOPY_RAM_DISCARD:
         return loadvm_postcopy_ram_handle_discard(mis, len);
 
+    case MIG_CMD_POSTCOPY_RESUME:
+        return loadvm_postcopy_handle_resume(mis);
+
     case MIG_CMD_RECV_BITMAP:
         return loadvm_handle_recv_bitmap(mis, len);
     }
diff --git a/migration/savevm.h b/migration/savevm.h
index 8126b1c..a5f3879 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -46,6 +46,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
 void qemu_savevm_send_postcopy_advise(QEMUFile *f);
 void qemu_savevm_send_postcopy_listen(QEMUFile *f);
 void qemu_savevm_send_postcopy_run(QEMUFile *f);
+void qemu_savevm_send_postcopy_resume(QEMUFile *f);
 void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name);
 
 void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
diff --git a/migration/trace-events b/migration/trace-events
index ed69551..04dd9d8 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -34,6 +34,7 @@ savevm_send_open_return_path(void) ""
 savevm_send_ping(uint32_t val) "%x"
 savevm_send_postcopy_listen(void) ""
 savevm_send_postcopy_run(void) ""
+savevm_send_postcopy_resume(void) ""
 savevm_send_recv_bitmap(char *name) "%s"
 savevm_state_setup(void) ""
 savevm_state_header(void) ""
-- 
2.7.4

Re: [Qemu-devel] [RFC 23/29] migration: new cmd MIG_CMD_POSTCOPY_RESUME

Posted by Dr. David Alan Gilbert 8 years, 6 months ago

* Peter Xu (peterx@redhat.com) wrote:
> Introducing this new command to be sent when the source VM is ready to
> resume the paused migration.  What the destination does here is
> basically release the fault thread to continue service page faults.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  migration/savevm.c     | 27 +++++++++++++++++++++++++++
>  migration/savevm.h     |  1 +
>  migration/trace-events |  1 +
>  3 files changed, 29 insertions(+)
> 
> diff --git a/migration/savevm.c b/migration/savevm.c
> index def9213..2e330bc 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -77,6 +77,7 @@ enum qemu_vm_cmd {
>      MIG_CMD_POSTCOPY_RAM_DISCARD,  /* A list of pages to discard that
>                                        were previously sent during
>                                        precopy but are dirty. */
> +    MIG_CMD_POSTCOPY_RESUME,       /* resume postcopy on dest */
>      MIG_CMD_PACKAGED,          /* Send a wrapped stream within this stream */
>      MIG_CMD_RECV_BITMAP,       /* Request for recved bitmap on dst */
>      MIG_CMD_MAX
> @@ -95,6 +96,7 @@ static struct mig_cmd_args {
>      [MIG_CMD_POSTCOPY_RUN]     = { .len =  0, .name = "POSTCOPY_RUN" },
>      [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
>                                     .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
> +    [MIG_CMD_POSTCOPY_RESUME]  = { .len =  0, .name = "POSTCOPY_RESUME" },
>      [MIG_CMD_PACKAGED]         = { .len =  4, .name = "PACKAGED" },
>      [MIG_CMD_RECV_BITMAP]      = { .len = -1, .name = "RECV_BITMAP" },
>      [MIG_CMD_MAX]              = { .len = -1, .name = "MAX" },
> @@ -931,6 +933,12 @@ void qemu_savevm_send_postcopy_run(QEMUFile *f)
>      qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
>  }
>  
> +void qemu_savevm_send_postcopy_resume(QEMUFile *f)
> +{
> +    trace_savevm_send_postcopy_resume();
> +    qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RESUME, 0, NULL);
> +}
> +
>  void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name)
>  {
>      size_t len;
> @@ -1671,6 +1679,22 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
>      return LOADVM_QUIT;
>  }
>  
> +static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
> +{
> +    /*
> +     * This means source VM is ready to resume the postcopy migration.
> +     * It's time to switch state and release the fault thread to
> +     * continue service page faults.
> +     */
> +    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
> +                      MIGRATION_STATUS_POSTCOPY_ACTIVE);
> +    qemu_sem_post(&mis->postcopy_pause_sem_fault);

Is it worth sanity checking that you were in RECOVER at this point?

Dave

> +
> +    /* TODO: Tell source that "we are ready" */
> +
> +    return 0;
> +}
> +
>  /**
>   * Immediately following this command is a blob of data containing an embedded
>   * chunk of migration stream; read it and load it.
> @@ -1834,6 +1858,9 @@ static int loadvm_process_command(QEMUFile *f)
>      case MIG_CMD_POSTCOPY_RAM_DISCARD:
>          return loadvm_postcopy_ram_handle_discard(mis, len);
>  
> +    case MIG_CMD_POSTCOPY_RESUME:
> +        return loadvm_postcopy_handle_resume(mis);
> +
>      case MIG_CMD_RECV_BITMAP:
>          return loadvm_handle_recv_bitmap(mis, len);
>      }
> diff --git a/migration/savevm.h b/migration/savevm.h
> index 8126b1c..a5f3879 100644
> --- a/migration/savevm.h
> +++ b/migration/savevm.h
> @@ -46,6 +46,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
>  void qemu_savevm_send_postcopy_advise(QEMUFile *f);
>  void qemu_savevm_send_postcopy_listen(QEMUFile *f);
>  void qemu_savevm_send_postcopy_run(QEMUFile *f);
> +void qemu_savevm_send_postcopy_resume(QEMUFile *f);
>  void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name);
>  
>  void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
> diff --git a/migration/trace-events b/migration/trace-events
> index ed69551..04dd9d8 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -34,6 +34,7 @@ savevm_send_open_return_path(void) ""
>  savevm_send_ping(uint32_t val) "%x"
>  savevm_send_postcopy_listen(void) ""
>  savevm_send_postcopy_run(void) ""
> +savevm_send_postcopy_resume(void) ""
>  savevm_send_recv_bitmap(char *name) "%s"
>  savevm_state_setup(void) ""
>  savevm_state_header(void) ""
> -- 
> 2.7.4
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

Re: [Qemu-devel] [RFC 23/29] migration: new cmd MIG_CMD_POSTCOPY_RESUME

Posted by Peter Xu 8 years, 6 months ago

On Thu, Aug 03, 2017 at 12:05:41PM +0100, Dr. David Alan Gilbert wrote:

[...]

> > +static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
> > +{
> > +    /*
> > +     * This means source VM is ready to resume the postcopy migration.
> > +     * It's time to switch state and release the fault thread to
> > +     * continue service page faults.
> > +     */
> > +    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
> > +                      MIGRATION_STATUS_POSTCOPY_ACTIVE);
> > +    qemu_sem_post(&mis->postcopy_pause_sem_fault);
> 
> Is it worth sanity checking that you were in RECOVER at this point?

Yeah, it never hurts.  Will do.

-- 
Peter Xu

Re: [Qemu-devel] [RFC 23/29] migration: new cmd MIG_CMD_POSTCOPY_RESUME

Posted by Peter Xu 8 years, 6 months ago

On Fri, Aug 04, 2017 at 03:04:19PM +0800, Peter Xu wrote:
> On Thu, Aug 03, 2017 at 12:05:41PM +0100, Dr. David Alan Gilbert wrote:
> 
> [...]
> 
> > > +static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
> > > +{
> > > +    /*
> > > +     * This means source VM is ready to resume the postcopy migration.
> > > +     * It's time to switch state and release the fault thread to
> > > +     * continue service page faults.
> > > +     */
> > > +    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
> > > +                      MIGRATION_STATUS_POSTCOPY_ACTIVE);
> > > +    qemu_sem_post(&mis->postcopy_pause_sem_fault);
> > 
> > Is it worth sanity checking that you were in RECOVER at this point?
> 
> Yeah, it never hurts.  Will do.

Not sure whether this would be good (note: I returned 0 in the if):

diff --git a/migration/savevm.c b/migration/savevm.c
index b7843c2..b34f59b 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1709,6 +1709,12 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
 
 static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
 {
+    if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
+        error_report("%s: illegal resume received", __func__);
+        /* Don't fail the load, only for this. */
+        return 0;
+    }
+
     /*
      * This means source VM is ready to resume the postcopy migration.
      * It's time to switch state and release the fault thread to

Basically I just don't want to crash the dest VM (it holds hot dirty
pages) even if it receives a faulty RESUME command.

-- 
Peter Xu

Re: [Qemu-devel] [RFC 23/29] migration: new cmd MIG_CMD_POSTCOPY_RESUME

Posted by Dr. David Alan Gilbert 8 years, 6 months ago

* Peter Xu (peterx@redhat.com) wrote:
> On Fri, Aug 04, 2017 at 03:04:19PM +0800, Peter Xu wrote:
> > On Thu, Aug 03, 2017 at 12:05:41PM +0100, Dr. David Alan Gilbert wrote:
> > 
> > [...]
> > 
> > > > +static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
> > > > +{
> > > > +    /*
> > > > +     * This means source VM is ready to resume the postcopy migration.
> > > > +     * It's time to switch state and release the fault thread to
> > > > +     * continue service page faults.
> > > > +     */
> > > > +    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
> > > > +                      MIGRATION_STATUS_POSTCOPY_ACTIVE);
> > > > +    qemu_sem_post(&mis->postcopy_pause_sem_fault);
> > > 
> > > Is it worth sanity checking that you were in RECOVER at this point?
> > 
> > Yeah, it never hurts.  Will do.
> 
> Not sure whether this would be good (note: I returned 0 in the if):
> 
> diff --git a/migration/savevm.c b/migration/savevm.c
> index b7843c2..b34f59b 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1709,6 +1709,12 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
>  
>  static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
>  {
> +    if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
> +        error_report("%s: illegal resume received", __func__);
> +        /* Don't fail the load, only for this. */
> +        return 0;
> +    }
> +
>      /*
>       * This means source VM is ready to resume the postcopy migration.
>       * It's time to switch state and release the fault thread to
> 
> Basically I just don't want to crash the dest VM (it holds hot dirty
> pages) even if it receives a faulty RESUME command.

Yes, so now that's a fun problem; effectively you then have 3 valid
failure modes:
    a) An IO failure so we need to go into POSTCOPY_PAUSE
    b) A fatal migration stream problem to quit
    c) A non-fatal migration stream problem to go .. back into PAUSE?

Dave

> -- 
> Peter Xu
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

Re: [Qemu-devel] [RFC 23/29] migration: new cmd MIG_CMD_POSTCOPY_RESUME

Posted by Peter Xu 8 years, 6 months ago

On Fri, Aug 04, 2017 at 09:30:01AM +0100, Dr. David Alan Gilbert wrote:
> * Peter Xu (peterx@redhat.com) wrote:
> > On Fri, Aug 04, 2017 at 03:04:19PM +0800, Peter Xu wrote:
> > > On Thu, Aug 03, 2017 at 12:05:41PM +0100, Dr. David Alan Gilbert wrote:
> > > 
> > > [...]
> > > 
> > > > > +static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
> > > > > +{
> > > > > +    /*
> > > > > +     * This means source VM is ready to resume the postcopy migration.
> > > > > +     * It's time to switch state and release the fault thread to
> > > > > +     * continue service page faults.
> > > > > +     */
> > > > > +    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
> > > > > +                      MIGRATION_STATUS_POSTCOPY_ACTIVE);
> > > > > +    qemu_sem_post(&mis->postcopy_pause_sem_fault);
> > > > 
> > > > Is it worth sanity checking that you were in RECOVER at this point?
> > > 
> > > Yeah, it never hurts.  Will do.
> > 
> > Not sure whether this would be good (note: I returned 0 in the if):
> > 
> > diff --git a/migration/savevm.c b/migration/savevm.c
> > index b7843c2..b34f59b 100644
> > --- a/migration/savevm.c
> > +++ b/migration/savevm.c
> > @@ -1709,6 +1709,12 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
> >  
> >  static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
> >  {
> > +    if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
> > +        error_report("%s: illegal resume received", __func__);
> > +        /* Don't fail the load, only for this. */
> > +        return 0;
> > +    }
> > +
> >      /*
> >       * This means source VM is ready to resume the postcopy migration.
> >       * It's time to switch state and release the fault thread to
> > 
> > Basically I just don't want to crash the dest VM (it holds hot dirty
> > pages) even if it receives a faulty RESUME command.
> 
> Yes, so now that's a fun problem; effectively you then have 3 valid
> failure modes:
>     a) An IO failure so we need to go into POSTCOPY_PAUSE
>     b) A fatal migration stream problem to quit
>     c) A non-fatal migration stream problem to go .. back into PAUSE?

Hmm yes...

So I got at least three TODO ITEMs now:

- support manual switch source into PAUSED state
- support migrate_cancel during PAUSED/RECOVER state
- when anything wrong happens during PAUSED/RECOVER, switching back to
  PAUSED state on both sides

It just depends on whether we would like to postpone these work, or we
think any of them are essential even for the first version.

IMHO we can postpone this 3rd one as well.

-- 
Peter Xu