[Qemu-devel] [RFC 11/29] migration: new postcopy-pause state

Peter Xu posted 29 patches 8 years, 6 months ago
There is a newer version of this series
[Qemu-devel] [RFC 11/29] migration: new postcopy-pause state
Posted by Peter Xu 8 years, 6 months ago
Introducing a new state "postcopy-paused", which can be used to pause a
postcopy migration. It is targeted to support network failures during
postcopy migration. Now when network down for postcopy, the source side
will not fail the migration. Instead we convert the status into this new
paused state, and we will try to wait for a rescue in the future.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/migration.c  | 78 +++++++++++++++++++++++++++++++++++++++++++++++---
 migration/migration.h  |  3 ++
 migration/trace-events |  1 +
 qapi-schema.json       |  5 +++-
 4 files changed, 82 insertions(+), 5 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index efee87e..0bc70c8 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -470,6 +470,7 @@ static bool migration_is_setup_or_active(int state)
     switch (state) {
     case MIGRATION_STATUS_ACTIVE:
     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
+    case MIGRATION_STATUS_POSTCOPY_PAUSED:
     case MIGRATION_STATUS_SETUP:
         return true;
 
@@ -545,6 +546,7 @@ MigrationInfo *qmp_query_migrate(Error **errp)
     case MIGRATION_STATUS_ACTIVE:
     case MIGRATION_STATUS_CANCELLING:
     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
+    case MIGRATION_STATUS_POSTCOPY_PAUSED:
          /* TODO add some postcopy stats */
         info->has_status = true;
         info->has_total_time = true;
@@ -991,6 +993,8 @@ static void migrate_fd_cleanup(void *opaque)
 
     notifier_list_notify(&migration_state_notifiers, s);
     block_cleanup_parameters(s);
+
+    qemu_sem_destroy(&s->postcopy_pause_sem);
 }
 
 void migrate_fd_error(MigrationState *s, const Error *error)
@@ -1134,6 +1138,7 @@ MigrationState *migrate_init(void)
     s->migration_thread_running = false;
     error_free(s->error);
     s->error = NULL;
+    qemu_sem_init(&s->postcopy_pause_sem, 0);
 
     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
 
@@ -1942,6 +1947,69 @@ static bool postcopy_should_start(MigrationState *s)
 }
 
 /*
+ * We don't return until we are in a safe state to continue current
+ * postcopy migration.  Returns true to continue the migration, or
+ * false to terminate current migration.
+ */
+static bool postcopy_pause(MigrationState *s)
+{
+    assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
+    migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
+                      MIGRATION_STATUS_POSTCOPY_PAUSED);
+
+    /* Current channel is possibly broken. Release it. */
+    assert(s->to_dst_file);
+    qemu_file_shutdown(s->to_dst_file);
+    qemu_fclose(s->to_dst_file);
+    s->to_dst_file = NULL;
+
+    /*
+     * We wait until things fixed up. Then someone will setup the
+     * status back for us.
+     */
+    while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
+        qemu_sem_wait(&s->postcopy_pause_sem);
+    }
+
+    trace_postcopy_pause_continued();
+
+    return true;
+}
+
+/* Return true if we want to stop the migration, otherwise false. */
+static bool migration_detect_error(MigrationState *s)
+{
+    int ret;
+
+    /* Try to detect any file errors */
+    ret = qemu_file_get_error(s->to_dst_file);
+
+    if (!ret) {
+        /* Everything is fine */
+        return false;
+    }
+
+    if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) {
+        /*
+         * For postcopy, we allow the network to be down for a
+         * while. After that, it can be continued by a
+         * recovery phase.
+         */
+        return !postcopy_pause(s);
+    } else {
+        /*
+         * For precopy (or postcopy with error outside IO), we fail
+         * with no time.
+         */
+        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+        trace_migration_thread_file_err();
+
+        /* Time to stop the migration, now. */
+        return true;
+    }
+}
+
+/*
  * Master migration thread on the source VM.
  * It drives the migration and pumps the data down the outgoing channel.
  */
@@ -2037,12 +2105,14 @@ static void *migration_thread(void *opaque)
             }
         }
 
-        if (qemu_file_get_error(s->to_dst_file)) {
-            migrate_set_state(&s->state, current_active_state,
-                              MIGRATION_STATUS_FAILED);
-            trace_migration_thread_file_err();
+        /*
+         * Try to detect any kind of failures, and see whether we
+         * should stop the migration now.
+         */
+        if (migration_detect_error(s)) {
             break;
         }
+
         current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
         if (current_time >= initial_time + BUFFER_DELAY) {
             uint64_t transferred_bytes = qemu_ftell(s->to_dst_file) -
diff --git a/migration/migration.h b/migration/migration.h
index e902bae..24cdaf6 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -151,6 +151,9 @@ struct MigrationState
     bool send_configuration;
     /* Whether we send section footer during migration */
     bool send_section_footer;
+
+    /* Needed by postcopy-pause state */
+    QemuSemaphore postcopy_pause_sem;
 };
 
 void migrate_set_state(int *state, int old_state, int new_state);
diff --git a/migration/trace-events b/migration/trace-events
index 08d00fa..2211acc 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -98,6 +98,7 @@ migration_thread_setup_complete(void) ""
 open_return_path_on_source(void) ""
 open_return_path_on_source_continue(void) ""
 postcopy_start(void) ""
+postcopy_pause_continued(void) ""
 postcopy_start_set_run(void) ""
 source_return_path_thread_bad_end(void) ""
 source_return_path_thread_end(void) ""
diff --git a/qapi-schema.json b/qapi-schema.json
index 9c6c3e1..2a36b80 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -667,6 +667,8 @@
 #
 # @postcopy-active: like active, but now in postcopy mode. (since 2.5)
 #
+# @postcopy-paused: during postcopy but paused. (since 2.10)
+#
 # @completed: migration is finished.
 #
 # @failed: some error occurred during migration process.
@@ -679,7 +681,8 @@
 ##
 { 'enum': 'MigrationStatus',
   'data': [ 'none', 'setup', 'cancelling', 'cancelled',
-            'active', 'postcopy-active', 'completed', 'failed', 'colo' ] }
+            'active', 'postcopy-active', 'postcopy-paused',
+            'completed', 'failed', 'colo' ] }
 
 ##
 # @MigrationInfo:
-- 
2.7.4


Re: [Qemu-devel] [RFC 11/29] migration: new postcopy-pause state
Posted by Eric Blake 8 years, 6 months ago
On 07/28/2017 03:06 AM, Peter Xu wrote:
> Introducing a new state "postcopy-paused", which can be used to pause a
> postcopy migration. It is targeted to support network failures during
> postcopy migration. Now when network down for postcopy, the source side
> will not fail the migration. Instead we convert the status into this new
> paused state, and we will try to wait for a rescue in the future.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---

You might want to use scripts/git.orderfile to put .json changes early
in your diffs (interface before implementation makes for easier reviews).

> +++ b/qapi-schema.json
> @@ -667,6 +667,8 @@
>  #
>  # @postcopy-active: like active, but now in postcopy mode. (since 2.5)
>  #
> +# @postcopy-paused: during postcopy but paused. (since 2.10)
> +#

You've missed 2.10; this should be 2.11.  Can this state occur without
any explicit request (ie. old clients may be confused by it), or do you
have to opt-in to a specific migration parameter to inform qemu that you
are aware of how to handle this state?

>  # @completed: migration is finished.
>  #
>  # @failed: some error occurred during migration process.
> @@ -679,7 +681,8 @@
>  ##
>  { 'enum': 'MigrationStatus',
>    'data': [ 'none', 'setup', 'cancelling', 'cancelled',
> -            'active', 'postcopy-active', 'completed', 'failed', 'colo' ] }
> +            'active', 'postcopy-active', 'postcopy-paused',
> +            'completed', 'failed', 'colo' ] }
>  
>  ##
>  # @MigrationInfo:
> 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [RFC 11/29] migration: new postcopy-pause state
Posted by Peter Xu 8 years, 6 months ago
On Fri, Jul 28, 2017 at 10:53:00AM -0500, Eric Blake wrote:
> On 07/28/2017 03:06 AM, Peter Xu wrote:
> > Introducing a new state "postcopy-paused", which can be used to pause a
> > postcopy migration. It is targeted to support network failures during
> > postcopy migration. Now when network down for postcopy, the source side
> > will not fail the migration. Instead we convert the status into this new
> > paused state, and we will try to wait for a rescue in the future.
> > 
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> > ---
> 
> You might want to use scripts/git.orderfile to put .json changes early
> in your diffs (interface before implementation makes for easier reviews).

Will do.

> 
> > +++ b/qapi-schema.json
> > @@ -667,6 +667,8 @@
> >  #
> >  # @postcopy-active: like active, but now in postcopy mode. (since 2.5)
> >  #
> > +# @postcopy-paused: during postcopy but paused. (since 2.10)
> > +#
> 
> You've missed 2.10; this should be 2.11.

Definitely. It should be for 2.11.

> Can this state occur without
> any explicit request (ie. old clients may be confused by it), or do you
> have to opt-in to a specific migration parameter to inform qemu that you
> are aware of how to handle this state?

Yes, it can occur automatically, without any operation from user's
side. And it does not have a tunable to switch it on/off - it'll
always be on (that's my plan, though), since IMHO holding at a paused
state is always better than crashing the source directly (that's what
we do now - when postcopy encountered network failure, the VM will
crash and data will be lost).

Thanks,

-- 
Peter Xu

Re: [Qemu-devel] [RFC 11/29] migration: new postcopy-pause state
Posted by Dr. David Alan Gilbert 8 years, 6 months ago
* Peter Xu (peterx@redhat.com) wrote:
> Introducing a new state "postcopy-paused", which can be used to pause a
> postcopy migration. It is targeted to support network failures during
> postcopy migration. Now when network down for postcopy, the source side
> will not fail the migration. Instead we convert the status into this new
> paused state, and we will try to wait for a rescue in the future.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>

I think this should probably be split into:
   a) A patch that adds a new state and the entries in query_migrate etc
   b) A patch that wires up the semaphore and the use of the state.

> ---
>  migration/migration.c  | 78 +++++++++++++++++++++++++++++++++++++++++++++++---
>  migration/migration.h  |  3 ++
>  migration/trace-events |  1 +
>  qapi-schema.json       |  5 +++-
>  4 files changed, 82 insertions(+), 5 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index efee87e..0bc70c8 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -470,6 +470,7 @@ static bool migration_is_setup_or_active(int state)
>      switch (state) {
>      case MIGRATION_STATUS_ACTIVE:
>      case MIGRATION_STATUS_POSTCOPY_ACTIVE:
> +    case MIGRATION_STATUS_POSTCOPY_PAUSED:
>      case MIGRATION_STATUS_SETUP:
>          return true;
>  
> @@ -545,6 +546,7 @@ MigrationInfo *qmp_query_migrate(Error **errp)
>      case MIGRATION_STATUS_ACTIVE:
>      case MIGRATION_STATUS_CANCELLING:
>      case MIGRATION_STATUS_POSTCOPY_ACTIVE:
> +    case MIGRATION_STATUS_POSTCOPY_PAUSED:
>           /* TODO add some postcopy stats */
>          info->has_status = true;
>          info->has_total_time = true;
> @@ -991,6 +993,8 @@ static void migrate_fd_cleanup(void *opaque)
>  
>      notifier_list_notify(&migration_state_notifiers, s);
>      block_cleanup_parameters(s);
> +
> +    qemu_sem_destroy(&s->postcopy_pause_sem);
>  }
>  
>  void migrate_fd_error(MigrationState *s, const Error *error)
> @@ -1134,6 +1138,7 @@ MigrationState *migrate_init(void)
>      s->migration_thread_running = false;
>      error_free(s->error);
>      s->error = NULL;
> +    qemu_sem_init(&s->postcopy_pause_sem, 0);
>  
>      migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
>  
> @@ -1942,6 +1947,69 @@ static bool postcopy_should_start(MigrationState *s)
>  }
>  
>  /*
> + * We don't return until we are in a safe state to continue current
> + * postcopy migration.  Returns true to continue the migration, or
> + * false to terminate current migration.
> + */
> +static bool postcopy_pause(MigrationState *s)
> +{
> +    assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);

I never like asserts on the sending side.

> +    migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
> +                      MIGRATION_STATUS_POSTCOPY_PAUSED);
> +
> +    /* Current channel is possibly broken. Release it. */
> +    assert(s->to_dst_file);
> +    qemu_file_shutdown(s->to_dst_file);
> +    qemu_fclose(s->to_dst_file);
> +    s->to_dst_file = NULL;

That does scare me a little; I think it's OK, I'm not sure what happens
to the ->from_dst_file fd and the return-path processing.

> +    /*
> +     * We wait until things fixed up. Then someone will setup the
> +     * status back for us.
> +     */
> +    while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
> +        qemu_sem_wait(&s->postcopy_pause_sem);
> +    }

Something should get written to stderr prior to this, so when we
find a migration apparently stuck we can tell why.

Dave

> +
> +    trace_postcopy_pause_continued();
> +
> +    return true;
> +}
> +
> +/* Return true if we want to stop the migration, otherwise false. */
> +static bool migration_detect_error(MigrationState *s)
> +{
> +    int ret;
> +
> +    /* Try to detect any file errors */
> +    ret = qemu_file_get_error(s->to_dst_file);
> +
> +    if (!ret) {
> +        /* Everything is fine */
> +        return false;
> +    }
> +
> +    if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) {
> +        /*
> +         * For postcopy, we allow the network to be down for a
> +         * while. After that, it can be continued by a
> +         * recovery phase.
> +         */
> +        return !postcopy_pause(s);
> +    } else {
> +        /*
> +         * For precopy (or postcopy with error outside IO), we fail
> +         * with no time.
> +         */
> +        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
> +        trace_migration_thread_file_err();
> +
> +        /* Time to stop the migration, now. */
> +        return true;
> +    }
> +}
> +
> +/*
>   * Master migration thread on the source VM.
>   * It drives the migration and pumps the data down the outgoing channel.
>   */
> @@ -2037,12 +2105,14 @@ static void *migration_thread(void *opaque)
>              }
>          }
>  
> -        if (qemu_file_get_error(s->to_dst_file)) {
> -            migrate_set_state(&s->state, current_active_state,
> -                              MIGRATION_STATUS_FAILED);
> -            trace_migration_thread_file_err();
> +        /*
> +         * Try to detect any kind of failures, and see whether we
> +         * should stop the migration now.
> +         */
> +        if (migration_detect_error(s)) {
>              break;
>          }
> +
>          current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
>          if (current_time >= initial_time + BUFFER_DELAY) {
>              uint64_t transferred_bytes = qemu_ftell(s->to_dst_file) -
> diff --git a/migration/migration.h b/migration/migration.h
> index e902bae..24cdaf6 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -151,6 +151,9 @@ struct MigrationState
>      bool send_configuration;
>      /* Whether we send section footer during migration */
>      bool send_section_footer;
> +
> +    /* Needed by postcopy-pause state */
> +    QemuSemaphore postcopy_pause_sem;
>  };
>  
>  void migrate_set_state(int *state, int old_state, int new_state);
> diff --git a/migration/trace-events b/migration/trace-events
> index 08d00fa..2211acc 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -98,6 +98,7 @@ migration_thread_setup_complete(void) ""
>  open_return_path_on_source(void) ""
>  open_return_path_on_source_continue(void) ""
>  postcopy_start(void) ""
> +postcopy_pause_continued(void) ""
>  postcopy_start_set_run(void) ""
>  source_return_path_thread_bad_end(void) ""
>  source_return_path_thread_end(void) ""
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 9c6c3e1..2a36b80 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -667,6 +667,8 @@
>  #
>  # @postcopy-active: like active, but now in postcopy mode. (since 2.5)
>  #
> +# @postcopy-paused: during postcopy but paused. (since 2.10)
> +#
>  # @completed: migration is finished.
>  #
>  # @failed: some error occurred during migration process.
> @@ -679,7 +681,8 @@
>  ##
>  { 'enum': 'MigrationStatus',
>    'data': [ 'none', 'setup', 'cancelling', 'cancelled',
> -            'active', 'postcopy-active', 'completed', 'failed', 'colo' ] }
> +            'active', 'postcopy-active', 'postcopy-paused',
> +            'completed', 'failed', 'colo' ] }
>  
>  ##
>  # @MigrationInfo:
> -- 
> 2.7.4
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

Re: [Qemu-devel] [RFC 11/29] migration: new postcopy-pause state
Posted by Peter Xu 8 years, 6 months ago
On Mon, Jul 31, 2017 at 08:06:18PM +0100, Dr. David Alan Gilbert wrote:
> * Peter Xu (peterx@redhat.com) wrote:
> > Introducing a new state "postcopy-paused", which can be used to pause a
> > postcopy migration. It is targeted to support network failures during
> > postcopy migration. Now when network down for postcopy, the source side
> > will not fail the migration. Instead we convert the status into this new
> > paused state, and we will try to wait for a rescue in the future.
> > 
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> 
> I think this should probably be split into:
>    a) A patch that adds a new state and the entries in query_migrate etc
>    b) A patch that wires up the semaphore and the use of the state.

Reasonable.  Let me split it.

> 
> > ---
> >  migration/migration.c  | 78 +++++++++++++++++++++++++++++++++++++++++++++++---
> >  migration/migration.h  |  3 ++
> >  migration/trace-events |  1 +
> >  qapi-schema.json       |  5 +++-
> >  4 files changed, 82 insertions(+), 5 deletions(-)
> > 
> > diff --git a/migration/migration.c b/migration/migration.c
> > index efee87e..0bc70c8 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -470,6 +470,7 @@ static bool migration_is_setup_or_active(int state)
> >      switch (state) {
> >      case MIGRATION_STATUS_ACTIVE:
> >      case MIGRATION_STATUS_POSTCOPY_ACTIVE:
> > +    case MIGRATION_STATUS_POSTCOPY_PAUSED:
> >      case MIGRATION_STATUS_SETUP:
> >          return true;
> >  
> > @@ -545,6 +546,7 @@ MigrationInfo *qmp_query_migrate(Error **errp)
> >      case MIGRATION_STATUS_ACTIVE:
> >      case MIGRATION_STATUS_CANCELLING:
> >      case MIGRATION_STATUS_POSTCOPY_ACTIVE:
> > +    case MIGRATION_STATUS_POSTCOPY_PAUSED:
> >           /* TODO add some postcopy stats */
> >          info->has_status = true;
> >          info->has_total_time = true;
> > @@ -991,6 +993,8 @@ static void migrate_fd_cleanup(void *opaque)
> >  
> >      notifier_list_notify(&migration_state_notifiers, s);
> >      block_cleanup_parameters(s);
> > +
> > +    qemu_sem_destroy(&s->postcopy_pause_sem);
> >  }
> >  
> >  void migrate_fd_error(MigrationState *s, const Error *error)
> > @@ -1134,6 +1138,7 @@ MigrationState *migrate_init(void)
> >      s->migration_thread_running = false;
> >      error_free(s->error);
> >      s->error = NULL;
> > +    qemu_sem_init(&s->postcopy_pause_sem, 0);
> >  
> >      migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
> >  
> > @@ -1942,6 +1947,69 @@ static bool postcopy_should_start(MigrationState *s)
> >  }
> >  
> >  /*
> > + * We don't return until we are in a safe state to continue current
> > + * postcopy migration.  Returns true to continue the migration, or
> > + * false to terminate current migration.
> > + */
> > +static bool postcopy_pause(MigrationState *s)
> > +{
> > +    assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
> 
> I never like asserts on the sending side.

Indeed aborting on source side is dangerous (e.g., source may loss
data). However this is definitely a "valid assertion" - if current
state is not "postcopy-active", we should be in a very strange state.
If we just continue to run the latter codes, imho it is as dangerous
as if we assert() here and stop the program. Even, that may be more
dangerous considering that we don't really know what will happen
next...

> 
> > +    migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
> > +                      MIGRATION_STATUS_POSTCOPY_PAUSED);
> > +
> > +    /* Current channel is possibly broken. Release it. */
> > +    assert(s->to_dst_file);
> > +    qemu_file_shutdown(s->to_dst_file);
> > +    qemu_fclose(s->to_dst_file);
> > +    s->to_dst_file = NULL;
> 
> That does scare me a little; I think it's OK, I'm not sure what happens
> to the ->from_dst_file fd and the return-path processing.

For sockets: I think the QIOChannelSocket.fd will be set to -1 during
close, then the return path code will not be able to read from that
channel any more (it'll get -EIO then as well), and it'll pause as
well. If it was blocking at recvmsg(), it should return with a
failure.

But yes, I think there may have possible indeed risk conditions
between the to/from QEMUFiles considering they are sharing the same
channel... Maybe that is a separate problem of "whether QIO channel
codes are thread safe"? I am not sure of it yet, otherwise we may need
some locking mechanism.

> 
> > +    /*
> > +     * We wait until things fixed up. Then someone will setup the
> > +     * status back for us.
> > +     */
> > +    while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
> > +        qemu_sem_wait(&s->postcopy_pause_sem);
> > +    }
> 
> Something should get written to stderr prior to this, so when we
> find a migration apparently stuck we can tell why.

Yes I think so.  Thanks,

-- 
Peter Xu