[RFC PATCH 12/25] migration: Fold migration_cleanup() into migration_connect_error_propagate()

Fabiano Rosas posted 25 patches 1 week, 4 days ago
Maintainers: Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>, Li Zhijian <lizhijian@fujitsu.com>
There is a newer version of this series
[RFC PATCH 12/25] migration: Fold migration_cleanup() into migration_connect_error_propagate()
Posted by Fabiano Rosas 1 week, 4 days ago
Whenever an error occurs between migrate_init() and the start of
migration_thread, do cleanup immediately after.

This allows the special casing for resume to be removed from
migration_connect(), that check is now done at
migration_connect_error_propagate() which already had a case for
resume.

Signed-off-by: Fabiano Rosas <farosas@suse.de>
---
 migration/migration.c | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 0f1644b276..a66b2d7aaf 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1576,15 +1576,21 @@ static void migration_connect_error_propagate(MigrationState *s, Error *error)
 {
     MigrationStatus current = s->state;
     MigrationStatus next = MIGRATION_STATUS_NONE;
+    bool resume = false;
 
     switch (current) {
     case MIGRATION_STATUS_SETUP:
         next = MIGRATION_STATUS_FAILED;
         break;
 
+    case MIGRATION_STATUS_POSTCOPY_PAUSED:
+        resume = true;
+        break;
+
     case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
         /* Never fail a postcopy migration; switch back to PAUSED instead */
         next = MIGRATION_STATUS_POSTCOPY_PAUSED;
+        resume = true;
         break;
 
     case MIGRATION_STATUS_CANCELLING:
@@ -1609,6 +1615,10 @@ static void migration_connect_error_propagate(MigrationState *s, Error *error)
     }
 
     migrate_error_propagate(s, error);
+
+    if (!resume) {
+        migration_cleanup(s);
+    }
 }
 
 void migration_cancel(void)
@@ -2209,12 +2219,19 @@ static gboolean qmp_migrate_finish_cb(QIOChannel *channel,
                                       GIOCondition cond,
                                       void *opaque)
 {
+    MigrationState *s = migrate_get_current();
     MigrationAddress *addr = opaque;
+    Error *local_err = NULL;
+
+    qmp_migrate_finish(addr, &local_err);
+
+    if (local_err) {
+        migration_connect_error_propagate(s, local_err);
+    }
 
-    qmp_migrate_finish(addr, NULL);
 
     cpr_state_close();
-    migrate_hup_delete(migrate_get_current());
+    migrate_hup_delete(s);
     qapi_free_MigrationAddress(addr);
     return G_SOURCE_REMOVE;
 }
@@ -2223,7 +2240,6 @@ void qmp_migrate(const char *uri, bool has_channels,
                  MigrationChannelList *channels, bool has_detach, bool detach,
                  bool has_resume, bool resume, Error **errp)
 {
-    Error *local_err = NULL;
     MigrationState *s = migrate_get_current();
     g_autoptr(MigrationChannel) channel = NULL;
     MigrationAddress *addr = NULL;
@@ -2280,6 +2296,13 @@ void qmp_migrate(const char *uri, bool has_channels,
         return;
     }
 
+    /*
+     * The migrate_prepare() above calls migrate_init(). From this
+     * point on, until the end of migration, make sure any failures
+     * eventually result in a call to migration_cleanup().
+     */
+    Error *local_err = NULL;
+
     if (!cpr_state_save(cpr_channel, &local_err)) {
         goto out;
     }
@@ -2299,12 +2322,11 @@ void qmp_migrate(const char *uri, bool has_channels,
                         QAPI_CLONE(MigrationAddress, addr));
 
     } else {
-        qmp_migrate_finish(addr, errp);
+        qmp_migrate_finish(addr, &local_err);
     }
 
 out:
     if (local_err) {
-        yank_unregister_instance(MIGRATION_YANK_INSTANCE);
         migration_connect_error_propagate(s, error_copy(local_err));
         error_propagate(errp, local_err);
     }
@@ -2335,12 +2357,6 @@ static void qmp_migrate_finish(MigrationAddress *addr, Error **errp)
     } else {
         error_setg(&local_err, "uri is not a valid migration protocol");
     }
-
-    if (local_err) {
-        migration_connect_error_propagate(s, error_copy(local_err));
-        error_propagate(errp, local_err);
-        return;
-    }
 }
 
 void qmp_migrate_cancel(Error **errp)
@@ -4027,9 +4043,6 @@ void migration_connect(MigrationState *s, Error *error_in)
     s->expected_downtime = migrate_downtime_limit();
     if (error_in) {
         migration_connect_error_propagate(s, error_in);
-        if (!resume) {
-            migration_cleanup(s);
-        }
         if (s->error) {
             error_report_err(error_copy(s->error));
         }
@@ -4108,7 +4121,6 @@ void migration_connect(MigrationState *s, Error *error_in)
 
 fail:
     migration_connect_error_propagate(s, local_err);
-    migration_cleanup(s);
     if (s->error) {
         error_report_err(error_copy(s->error));
     }
-- 
2.51.0
Re: [RFC PATCH 12/25] migration: Fold migration_cleanup() into migration_connect_error_propagate()
Posted by Peter Xu 1 week, 1 day ago
On Fri, Dec 26, 2025 at 06:19:14PM -0300, Fabiano Rosas wrote:
> Whenever an error occurs between migrate_init() and the start of
> migration_thread, do cleanup immediately after.
> 
> This allows the special casing for resume to be removed from
> migration_connect(), that check is now done at
> migration_connect_error_propagate() which already had a case for
> resume.
> 
> Signed-off-by: Fabiano Rosas <farosas@suse.de>

Didn't spot anything wrong,

Reviewed-by: Peter Xu <peterx@redhat.com>

One nitpick below,

> ---
>  migration/migration.c | 42 +++++++++++++++++++++++++++---------------
>  1 file changed, 27 insertions(+), 15 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 0f1644b276..a66b2d7aaf 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1576,15 +1576,21 @@ static void migration_connect_error_propagate(MigrationState *s, Error *error)
>  {
>      MigrationStatus current = s->state;
>      MigrationStatus next = MIGRATION_STATUS_NONE;
> +    bool resume = false;
>  
>      switch (current) {
>      case MIGRATION_STATUS_SETUP:
>          next = MIGRATION_STATUS_FAILED;
>          break;
>  
> +    case MIGRATION_STATUS_POSTCOPY_PAUSED:
> +        resume = true;
> +        break;
> +
>      case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
>          /* Never fail a postcopy migration; switch back to PAUSED instead */
>          next = MIGRATION_STATUS_POSTCOPY_PAUSED;
> +        resume = true;
>          break;
>  
>      case MIGRATION_STATUS_CANCELLING:
> @@ -1609,6 +1615,10 @@ static void migration_connect_error_propagate(MigrationState *s, Error *error)
>      }
>  
>      migrate_error_propagate(s, error);
> +
> +    if (!resume) {
> +        migration_cleanup(s);
> +    }
>  }
>  
>  void migration_cancel(void)
> @@ -2209,12 +2219,19 @@ static gboolean qmp_migrate_finish_cb(QIOChannel *channel,
>                                        GIOCondition cond,
>                                        void *opaque)
>  {
> +    MigrationState *s = migrate_get_current();
>      MigrationAddress *addr = opaque;
> +    Error *local_err = NULL;
> +
> +    qmp_migrate_finish(addr, &local_err);
> +
> +    if (local_err) {
> +        migration_connect_error_propagate(s, local_err);
> +    }
>  
> -    qmp_migrate_finish(addr, NULL);
>  
>      cpr_state_close();
> -    migrate_hup_delete(migrate_get_current());
> +    migrate_hup_delete(s);

IMHO we should drop these two lines.  For error cases, now they'll be done
in migration_cleanup() above.  Actually for success, it's the same, but in
the cleanup BH.

Maybe there're other cases where we can clean the code a bit on cpr;
there're codes that always does "if (xxx)" and calling them all over the
places, so it's easy to write such code when drafting a feature, but hard
to maintain, because it'll be obscure when it'll really trigger, like this
one.  We can leave the rest for later if there're applicable similar
cleanups.

>      qapi_free_MigrationAddress(addr);
>      return G_SOURCE_REMOVE;
>  }
> @@ -2223,7 +2240,6 @@ void qmp_migrate(const char *uri, bool has_channels,
>                   MigrationChannelList *channels, bool has_detach, bool detach,
>                   bool has_resume, bool resume, Error **errp)
>  {
> -    Error *local_err = NULL;
>      MigrationState *s = migrate_get_current();
>      g_autoptr(MigrationChannel) channel = NULL;
>      MigrationAddress *addr = NULL;
> @@ -2280,6 +2296,13 @@ void qmp_migrate(const char *uri, bool has_channels,
>          return;
>      }
>  
> +    /*
> +     * The migrate_prepare() above calls migrate_init(). From this
> +     * point on, until the end of migration, make sure any failures
> +     * eventually result in a call to migration_cleanup().
> +     */
> +    Error *local_err = NULL;
> +
>      if (!cpr_state_save(cpr_channel, &local_err)) {
>          goto out;
>      }
> @@ -2299,12 +2322,11 @@ void qmp_migrate(const char *uri, bool has_channels,
>                          QAPI_CLONE(MigrationAddress, addr));
>  
>      } else {
> -        qmp_migrate_finish(addr, errp);
> +        qmp_migrate_finish(addr, &local_err);
>      }
>  
>  out:
>      if (local_err) {
> -        yank_unregister_instance(MIGRATION_YANK_INSTANCE);
>          migration_connect_error_propagate(s, error_copy(local_err));
>          error_propagate(errp, local_err);
>      }
> @@ -2335,12 +2357,6 @@ static void qmp_migrate_finish(MigrationAddress *addr, Error **errp)
>      } else {
>          error_setg(&local_err, "uri is not a valid migration protocol");
>      }
> -
> -    if (local_err) {
> -        migration_connect_error_propagate(s, error_copy(local_err));
> -        error_propagate(errp, local_err);
> -        return;
> -    }
>  }
>  
>  void qmp_migrate_cancel(Error **errp)
> @@ -4027,9 +4043,6 @@ void migration_connect(MigrationState *s, Error *error_in)
>      s->expected_downtime = migrate_downtime_limit();
>      if (error_in) {
>          migration_connect_error_propagate(s, error_in);
> -        if (!resume) {
> -            migration_cleanup(s);
> -        }
>          if (s->error) {
>              error_report_err(error_copy(s->error));
>          }
> @@ -4108,7 +4121,6 @@ void migration_connect(MigrationState *s, Error *error_in)
>  
>  fail:
>      migration_connect_error_propagate(s, local_err);
> -    migration_cleanup(s);
>      if (s->error) {
>          error_report_err(error_copy(s->error));
>      }
> -- 
> 2.51.0
> 

-- 
Peter Xu
Re: [RFC PATCH 12/25] migration: Fold migration_cleanup() into migration_connect_error_propagate()
Posted by Fabiano Rosas 1 week, 1 day ago
Peter Xu <peterx@redhat.com> writes:

> On Fri, Dec 26, 2025 at 06:19:14PM -0300, Fabiano Rosas wrote:
>> Whenever an error occurs between migrate_init() and the start of
>> migration_thread, do cleanup immediately after.
>> 
>> This allows the special casing for resume to be removed from
>> migration_connect(), that check is now done at
>> migration_connect_error_propagate() which already had a case for
>> resume.
>> 
>> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>
> Didn't spot anything wrong,
>
> Reviewed-by: Peter Xu <peterx@redhat.com>
>
> One nitpick below,
>
>> ---
>>  migration/migration.c | 42 +++++++++++++++++++++++++++---------------
>>  1 file changed, 27 insertions(+), 15 deletions(-)
>> 
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 0f1644b276..a66b2d7aaf 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -1576,15 +1576,21 @@ static void migration_connect_error_propagate(MigrationState *s, Error *error)
>>  {
>>      MigrationStatus current = s->state;
>>      MigrationStatus next = MIGRATION_STATUS_NONE;
>> +    bool resume = false;
>>  
>>      switch (current) {
>>      case MIGRATION_STATUS_SETUP:
>>          next = MIGRATION_STATUS_FAILED;
>>          break;
>>  
>> +    case MIGRATION_STATUS_POSTCOPY_PAUSED:
>> +        resume = true;
>> +        break;
>> +
>>      case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
>>          /* Never fail a postcopy migration; switch back to PAUSED instead */
>>          next = MIGRATION_STATUS_POSTCOPY_PAUSED;
>> +        resume = true;
>>          break;
>>  
>>      case MIGRATION_STATUS_CANCELLING:
>> @@ -1609,6 +1615,10 @@ static void migration_connect_error_propagate(MigrationState *s, Error *error)
>>      }
>>  
>>      migrate_error_propagate(s, error);
>> +
>> +    if (!resume) {
>> +        migration_cleanup(s);
>> +    }
>>  }
>>  
>>  void migration_cancel(void)
>> @@ -2209,12 +2219,19 @@ static gboolean qmp_migrate_finish_cb(QIOChannel *channel,
>>                                        GIOCondition cond,
>>                                        void *opaque)
>>  {
>> +    MigrationState *s = migrate_get_current();
>>      MigrationAddress *addr = opaque;
>> +    Error *local_err = NULL;
>> +
>> +    qmp_migrate_finish(addr, &local_err);
>> +
>> +    if (local_err) {
>> +        migration_connect_error_propagate(s, local_err);
>> +    }
>>  
>> -    qmp_migrate_finish(addr, NULL);
>>  
>>      cpr_state_close();
>> -    migrate_hup_delete(migrate_get_current());
>> +    migrate_hup_delete(s);
>
> IMHO we should drop these two lines.  For error cases, now they'll be done
> in migration_cleanup() above.  Actually for success, it's the same, but in
> the cleanup BH.
>

Hmm that would be good, I'll look into it.

> Maybe there're other cases where we can clean the code a bit on cpr;
> there're codes that always does "if (xxx)" and calling them all over the
> places, so it's easy to write such code when drafting a feature, but hard
> to maintain, because it'll be obscure when it'll really trigger, like this
> one.  We can leave the rest for later if there're applicable similar
> cleanups.
>
>>      qapi_free_MigrationAddress(addr);
>>      return G_SOURCE_REMOVE;
>>  }
>> @@ -2223,7 +2240,6 @@ void qmp_migrate(const char *uri, bool has_channels,
>>                   MigrationChannelList *channels, bool has_detach, bool detach,
>>                   bool has_resume, bool resume, Error **errp)
>>  {
>> -    Error *local_err = NULL;
>>      MigrationState *s = migrate_get_current();
>>      g_autoptr(MigrationChannel) channel = NULL;
>>      MigrationAddress *addr = NULL;
>> @@ -2280,6 +2296,13 @@ void qmp_migrate(const char *uri, bool has_channels,
>>          return;
>>      }
>>  
>> +    /*
>> +     * The migrate_prepare() above calls migrate_init(). From this
>> +     * point on, until the end of migration, make sure any failures
>> +     * eventually result in a call to migration_cleanup().
>> +     */
>> +    Error *local_err = NULL;
>> +
>>      if (!cpr_state_save(cpr_channel, &local_err)) {
>>          goto out;
>>      }
>> @@ -2299,12 +2322,11 @@ void qmp_migrate(const char *uri, bool has_channels,
>>                          QAPI_CLONE(MigrationAddress, addr));
>>  
>>      } else {
>> -        qmp_migrate_finish(addr, errp);
>> +        qmp_migrate_finish(addr, &local_err);
>>      }
>>  
>>  out:
>>      if (local_err) {
>> -        yank_unregister_instance(MIGRATION_YANK_INSTANCE);
>>          migration_connect_error_propagate(s, error_copy(local_err));
>>          error_propagate(errp, local_err);
>>      }
>> @@ -2335,12 +2357,6 @@ static void qmp_migrate_finish(MigrationAddress *addr, Error **errp)
>>      } else {
>>          error_setg(&local_err, "uri is not a valid migration protocol");
>>      }
>> -
>> -    if (local_err) {
>> -        migration_connect_error_propagate(s, error_copy(local_err));
>> -        error_propagate(errp, local_err);
>> -        return;
>> -    }
>>  }
>>  
>>  void qmp_migrate_cancel(Error **errp)
>> @@ -4027,9 +4043,6 @@ void migration_connect(MigrationState *s, Error *error_in)
>>      s->expected_downtime = migrate_downtime_limit();
>>      if (error_in) {
>>          migration_connect_error_propagate(s, error_in);
>> -        if (!resume) {
>> -            migration_cleanup(s);
>> -        }
>>          if (s->error) {
>>              error_report_err(error_copy(s->error));
>>          }
>> @@ -4108,7 +4121,6 @@ void migration_connect(MigrationState *s, Error *error_in)
>>  
>>  fail:
>>      migration_connect_error_propagate(s, local_err);
>> -    migration_cleanup(s);
>>      if (s->error) {
>>          error_report_err(error_copy(s->error));
>>      }
>> -- 
>> 2.51.0
>>