[PATCH v2 1/2] qapi/migration.json: add reason to MIGRATION event

Roman Khapov posted 2 patches 9 months, 2 weeks ago
Maintainers: Hailiang Zhang <zhanghailiang@xfusion.com>, Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>, Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>
[PATCH v2 1/2] qapi/migration.json: add reason to MIGRATION event
Posted by Roman Khapov 9 months, 2 weeks ago
This commit adds the optional field reason for the events, which
contains the string, describing reason of status changing.
For example: reason of migration fail.

Function migrate_set_state now accepts 4th argument: the reason to
pass to event. Every call of this function appended with NULL argument.

Also migrate_set_state_err_reason was added to form reason from Error*

Signed-off-by: Roman Khapov <rkhapov@yandex-team.ru>
---
 migration/colo.c      |  6 +--
 migration/migration.c | 86 ++++++++++++++++++++++++++-----------------
 migration/migration.h |  5 ++-
 migration/multifd.c   |  6 +--
 migration/savevm.c    | 14 +++----
 qapi/migration.json   |  3 +-
 6 files changed, 71 insertions(+), 49 deletions(-)

diff --git a/migration/colo.c b/migration/colo.c
index 315e31fe32..3c95291a83 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -102,7 +102,7 @@ static void secondary_vm_do_failover(void)
     }
 
     migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
-                      MIGRATION_STATUS_COMPLETED);
+                      MIGRATION_STATUS_COMPLETED, NULL);
 
     replication_stop_all(true, &local_err);
     if (local_err) {
@@ -157,7 +157,7 @@ static void primary_vm_do_failover(void)
     Error *local_err = NULL;
 
     migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
-                      MIGRATION_STATUS_COMPLETED);
+                      MIGRATION_STATUS_COMPLETED, NULL);
     /*
      * kick COLO thread which might wait at
      * qemu_sem_wait(&s->colo_checkpoint_sem).
@@ -823,7 +823,7 @@ static void *colo_process_incoming_thread(void *opaque)
     qemu_sem_init(&mis->colo_incoming_sem, 0);
 
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
-                      MIGRATION_STATUS_COLO);
+                      MIGRATION_STATUS_COLO, NULL);
 
     if (get_colo_mode() != COLO_MODE_SECONDARY) {
         error_report("COLO mode must be COLO_MODE_SECONDARY");
diff --git a/migration/migration.c b/migration/migration.c
index ab21de2cad..d28885a55b 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -354,10 +354,10 @@ void migration_incoming_state_destroy(void)
     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
 }
 
-static void migrate_generate_event(int new_state)
+static void migrate_generate_event(int new_state, const char *reason)
 {
     if (migrate_events()) {
-        qapi_event_send_migration(new_state);
+        qapi_event_send_migration(new_state, reason);
     }
 }
 
@@ -598,7 +598,7 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
     }
 
     migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
-                      MIGRATION_STATUS_SETUP);
+                      MIGRATION_STATUS_SETUP, NULL);
 
     if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
         SocketAddress *saddr = &addr->u.socket;
@@ -692,7 +692,7 @@ static void process_incoming_migration_bh(void *opaque)
      * it's ready to use.
      */
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
-                      MIGRATION_STATUS_COMPLETED);
+                      MIGRATION_STATUS_COMPLETED, NULL);
     migration_incoming_state_destroy();
 }
 
@@ -713,7 +713,7 @@ process_incoming_migration_co(void *opaque)
     mis->largest_page_size = qemu_ram_pagesize_largest();
     postcopy_state_set(POSTCOPY_INCOMING_NONE);
     migrate_set_state(&mis->state, MIGRATION_STATUS_SETUP,
-                      MIGRATION_STATUS_ACTIVE);
+                      MIGRATION_STATUS_ACTIVE, NULL);
 
     mis->loadvm_co = qemu_coroutine_self();
     ret = qemu_loadvm_state(mis->from_src_file);
@@ -762,7 +762,7 @@ process_incoming_migration_co(void *opaque)
     return;
 fail:
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
-                      MIGRATION_STATUS_FAILED);
+                      MIGRATION_STATUS_FAILED, NULL);
     qemu_fclose(mis->from_src_file);
 
     multifd_recv_cleanup();
@@ -808,7 +808,7 @@ static bool postcopy_try_recover(void)
         mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
 
         migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
-                          MIGRATION_STATUS_POSTCOPY_RECOVER);
+                          MIGRATION_STATUS_POSTCOPY_RECOVER, NULL);
 
         /*
          * Here, we only wake up the main loading thread (while the
@@ -1308,15 +1308,27 @@ void qmp_migrate_start_postcopy(Error **errp)
 
 /* shared migration helpers */
 
-void migrate_set_state(int *state, int old_state, int new_state)
+void migrate_set_state(int *state, int old_state,
+                       int new_state, const char *reason)
 {
     assert(new_state < MIGRATION_STATUS__MAX);
     if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
         trace_migrate_set_state(MigrationStatus_str(new_state));
-        migrate_generate_event(new_state);
+        migrate_generate_event(new_state, reason);
     }
 }
 
+void migrate_set_state_err_reason(int *state, int old_state,
+                                  int new_state, const Error *err)
+{
+    const char *reason = NULL;
+    if (likely(err != NULL)) {
+        reason = error_get_pretty(err);
+    }
+
+    migrate_set_state(state, old_state, new_state, reason);
+}
+
 static void migrate_fd_cleanup(MigrationState *s)
 {
     g_free(s->hostname);
@@ -1360,7 +1372,7 @@ static void migrate_fd_cleanup(MigrationState *s)
 
     if (s->state == MIGRATION_STATUS_CANCELLING) {
         migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
-                          MIGRATION_STATUS_CANCELLED);
+                          MIGRATION_STATUS_CANCELLED, NULL);
     }
 
     if (s->error) {
@@ -1406,7 +1418,7 @@ static void migrate_fd_error(MigrationState *s, const Error *error)
     trace_migrate_fd_error(error_get_pretty(error));
     assert(s->to_dst_file == NULL);
     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
-                      MIGRATION_STATUS_FAILED);
+                      MIGRATION_STATUS_FAILED, NULL);
     migrate_set_error(s, error);
 }
 
@@ -1432,7 +1444,8 @@ static void migrate_fd_cancel(MigrationState *s)
         if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
             qemu_sem_post(&s->pause_sem);
         }
-        migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
+        migrate_set_state(&s->state, old_state,
+                          MIGRATION_STATUS_CANCELLING, NULL);
     } while (s->state != MIGRATION_STATUS_CANCELLING);
 
     /*
@@ -1612,7 +1625,8 @@ int migrate_init(MigrationState *s, Error **errp)
     s->error = NULL;
     s->vmdesc = NULL;
 
-    migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
+    migrate_set_state(&s->state, MIGRATION_STATUS_NONE,
+                      MIGRATION_STATUS_SETUP, NULL);
 
     s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
     s->total_time = 0;
@@ -2024,7 +2038,7 @@ void qmp_migrate(const char *uri, bool has_channels,
         error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, "uri",
                    "a valid migration protocol");
         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
-                          MIGRATION_STATUS_FAILED);
+                          MIGRATION_STATUS_FAILED, NULL);
         block_cleanup_parameters();
     }
 
@@ -2145,7 +2159,7 @@ static bool migrate_handle_rp_resume_ack(MigrationState *s,
 
     /* Now both sides are active. */
     migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
-                      MIGRATION_STATUS_POSTCOPY_ACTIVE);
+                      MIGRATION_STATUS_POSTCOPY_ACTIVE, NULL);
 
     /* Notify send thread that time to continue send pages */
     migration_rp_kick(s);
@@ -2422,14 +2436,15 @@ static int postcopy_start(MigrationState *ms, Error **errp)
     if (migrate_postcopy_preempt()) {
         migration_wait_main_channel(ms);
         if (postcopy_preempt_establish_channel(ms)) {
-            migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
+            migrate_set_state(&ms->state, ms->state,
+                              MIGRATION_STATUS_FAILED, NULL);
             return -1;
         }
     }
 
     if (!migrate_pause_before_switchover()) {
         migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
-                          MIGRATION_STATUS_POSTCOPY_ACTIVE);
+                          MIGRATION_STATUS_POSTCOPY_ACTIVE, NULL);
     }
 
     trace_postcopy_start();
@@ -2559,7 +2574,7 @@ static int postcopy_start(MigrationState *ms, Error **errp)
     if (ret) {
         error_setg(errp, "postcopy_start: Migration stream errored");
         migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
-                              MIGRATION_STATUS_FAILED);
+                          MIGRATION_STATUS_FAILED, NULL);
     }
 
     trace_postcopy_preempt_enabled(migrate_postcopy_preempt());
@@ -2570,7 +2585,7 @@ fail_closefb:
     qemu_fclose(fb);
 fail:
     migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
-                          MIGRATION_STATUS_FAILED);
+                      MIGRATION_STATUS_FAILED, NULL);
     if (restart_block) {
         /* A failure happened early enough that we know the destination hasn't
          * accessed block devices, so we're safe to recover.
@@ -2618,10 +2633,10 @@ static int migration_maybe_pause(MigrationState *s,
     if (s->state != MIGRATION_STATUS_CANCELLING) {
         bql_unlock();
         migrate_set_state(&s->state, *current_active_state,
-                          MIGRATION_STATUS_PRE_SWITCHOVER);
+                          MIGRATION_STATUS_PRE_SWITCHOVER, NULL);
         qemu_sem_wait(&s->pause_sem);
         migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
-                          new_state);
+                          new_state, NULL);
         *current_active_state = new_state;
         bql_lock();
     }
@@ -2706,7 +2721,7 @@ static void migration_completion_failed(MigrationState *s,
     }
 
     migrate_set_state(&s->state, current_active_state,
-                      MIGRATION_STATUS_FAILED);
+                      MIGRATION_STATUS_FAILED, NULL);
 }
 
 /**
@@ -2744,10 +2759,10 @@ static void migration_completion(MigrationState *s)
     if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) {
         /* COLO does not support postcopy */
         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
-                          MIGRATION_STATUS_COLO);
+                          MIGRATION_STATUS_COLO, NULL);
     } else {
         migrate_set_state(&s->state, current_active_state,
-                          MIGRATION_STATUS_COMPLETED);
+                          MIGRATION_STATUS_COMPLETED, NULL);
     }
 
     return;
@@ -2785,12 +2800,12 @@ static void bg_migration_completion(MigrationState *s)
     }
 
     migrate_set_state(&s->state, current_active_state,
-                      MIGRATION_STATUS_COMPLETED);
+                      MIGRATION_STATUS_COMPLETED, NULL);
     return;
 
 fail:
     migrate_set_state(&s->state, current_active_state,
-                      MIGRATION_STATUS_FAILED);
+                      MIGRATION_STATUS_FAILED, NULL);
 }
 
 typedef enum MigThrError {
@@ -2901,7 +2916,7 @@ static MigThrError postcopy_pause(MigrationState *s)
         close_return_path_on_source(s);
 
         migrate_set_state(&s->state, s->state,
-                          MIGRATION_STATUS_POSTCOPY_PAUSED);
+                          MIGRATION_STATUS_POSTCOPY_PAUSED, NULL);
 
         error_report("Detected IO failure for postcopy. "
                      "Migration paused.");
@@ -2979,7 +2994,7 @@ static MigThrError migration_detect_error(MigrationState *s)
          * For precopy (or postcopy with error outside IO), we fail
          * with no time.
          */
-        migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
+        migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED, NULL);
         trace_migration_thread_file_err();
 
         /* Time to stop the migration, now. */
@@ -3279,7 +3294,8 @@ static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
                                     int new_state)
 {
     if (qemu_savevm_state_guest_unplug_pending()) {
-        migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG);
+        migrate_set_state(&s->state, old_state,
+                          MIGRATION_STATUS_WAIT_UNPLUG, NULL);
 
         while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
                qemu_savevm_state_guest_unplug_pending()) {
@@ -3302,9 +3318,10 @@ static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
             }
         }
 
-        migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state);
+        migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG,
+                          new_state, NULL);
     } else {
-        migrate_set_state(&s->state, old_state, new_state);
+        migrate_set_state(&s->state, old_state, new_state, NULL);
     }
 }
 
@@ -3549,7 +3566,7 @@ static void *bg_migration_thread(void *opaque)
 fail:
     if (early_fail) {
         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
-                MIGRATION_STATUS_FAILED);
+                          MIGRATION_STATUS_FAILED, NULL);
         bql_unlock();
     }
 
@@ -3615,7 +3632,8 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
     if (migrate_postcopy_ram() || migrate_return_path()) {
         if (open_return_path_on_source(s)) {
             error_setg(&local_err, "Unable to open return-path for postcopy");
-            migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+            migrate_set_state(&s->state, s->state,
+                              MIGRATION_STATUS_FAILED, NULL);
             migrate_set_error(s, local_err);
             error_report_err(local_err);
             migrate_fd_cleanup(s);
@@ -3635,7 +3653,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
     if (resume) {
         /* Wakeup the main migration thread to do the recovery */
         migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
-                          MIGRATION_STATUS_POSTCOPY_RECOVER);
+                          MIGRATION_STATUS_POSTCOPY_RECOVER, NULL);
         qemu_sem_post(&s->postcopy_pause_sem);
         return;
     }
diff --git a/migration/migration.h b/migration/migration.h
index f2c8b8f286..9150478654 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -468,7 +468,10 @@ struct MigrationState {
     bool rdma_migration;
 };
 
-void migrate_set_state(int *state, int old_state, int new_state);
+void migrate_set_state(int *state, int old_state,
+                       int new_state, const char *reason);
+void migrate_set_state_err_reason(int *state, int old_state,
+                                  int new_state, const Error *err);
 
 void migration_fd_process_incoming(QEMUFile *f);
 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
diff --git a/migration/multifd.c b/migration/multifd.c
index adfe8c9a0a..da3d397642 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -595,7 +595,7 @@ static void multifd_send_set_error(Error *err)
             s->state == MIGRATION_STATUS_DEVICE ||
             s->state == MIGRATION_STATUS_ACTIVE) {
             migrate_set_state(&s->state, s->state,
-                              MIGRATION_STATUS_FAILED);
+                              MIGRATION_STATUS_FAILED, NULL);
         }
     }
 }
@@ -1056,7 +1056,7 @@ bool multifd_send_setup(void)
         migrate_set_error(s, local_err);
         error_report_err(local_err);
         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
-                          MIGRATION_STATUS_FAILED);
+                          MIGRATION_STATUS_FAILED, NULL);
         return false;
     }
 
@@ -1087,7 +1087,7 @@ static void multifd_recv_terminate_threads(Error *err)
         if (s->state == MIGRATION_STATUS_SETUP ||
             s->state == MIGRATION_STATUS_ACTIVE) {
             migrate_set_state(&s->state, s->state,
-                              MIGRATION_STATUS_FAILED);
+                              MIGRATION_STATUS_FAILED, NULL);
         }
     }
 
diff --git a/migration/savevm.c b/migration/savevm.c
index d612c8a902..be6cce8a51 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1744,7 +1744,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
     } else {
         status = MIGRATION_STATUS_COMPLETED;
     }
-    migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
+    migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status, NULL);
 
     /* f is outer parameter, it should not stay in global migration state after
      * this function finished */
@@ -1999,7 +1999,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
     object_ref(OBJECT(migr));
 
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
-                                   MIGRATION_STATUS_POSTCOPY_ACTIVE);
+                      MIGRATION_STATUS_POSTCOPY_ACTIVE, NULL);
     qemu_sem_post(&mis->thread_sync_sem);
     trace_postcopy_ram_listen_thread_start();
 
@@ -2037,7 +2037,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
         } else {
             error_report("%s: loadvm failed: %d", __func__, load_res);
             migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
-                                           MIGRATION_STATUS_FAILED);
+                              MIGRATION_STATUS_FAILED, NULL);
         }
     }
     if (load_res >= 0) {
@@ -2062,7 +2062,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
     }
 
     migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
-                                   MIGRATION_STATUS_COMPLETED);
+                      MIGRATION_STATUS_COMPLETED, NULL);
     /*
      * If everything has worked fine, then the main thread has waited
      * for us to start, and we're the last use of the mis.
@@ -2257,7 +2257,7 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
      * This means source VM is ready to resume the postcopy migration.
      */
     migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
-                      MIGRATION_STATUS_POSTCOPY_ACTIVE);
+                      MIGRATION_STATUS_POSTCOPY_ACTIVE, NULL);
 
     trace_loadvm_postcopy_handle_resume();
 
@@ -2818,8 +2818,8 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
     }
 
     /* Current state can be either ACTIVE or RECOVER */
-    migrate_set_state(&mis->state, mis->state,
-                      MIGRATION_STATUS_POSTCOPY_PAUSED);
+    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
+                      MIGRATION_STATUS_POSTCOPY_PAUSED, NULL);
 
     /* Notify the fault thread for the invalidated file handle */
     postcopy_fault_thread_notify(mis);
diff --git a/qapi/migration.json b/qapi/migration.json
index 5a565d9b8d..33bb5b7f50 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1392,6 +1392,7 @@
 # Emitted when a migration event happens
 #
 # @status: @MigrationStatus describing the current migration status.
+# @reason: Optional description of status changing reason.
 #
 # Since: 2.4
 #
@@ -1402,7 +1403,7 @@
 #     "data": {"status": "completed"} }
 ##
 { 'event': 'MIGRATION',
-  'data': {'status': 'MigrationStatus'}}
+  'data': {'status': 'MigrationStatus', '*reason': 'str'}}
 
 ##
 # @MIGRATION_PASS:
-- 
2.34.1
Re: [PATCH v2 1/2] qapi/migration.json: add reason to MIGRATION event
Posted by Peter Xu 9 months, 1 week ago
On Thu, Feb 15, 2024 at 05:27:58PM +0500, Roman Khapov wrote:
>      migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
> -                      MIGRATION_STATUS_COMPLETED);
> +                      MIGRATION_STATUS_COMPLETED, NULL);

Instead of enforcing migrate_set_error() to always pass an error, maybe we
can allow migrate_generate_event() to fetch s->error in FAILED state, if
that hint ever existed?

-- 
Peter Xu
Re: [PATCH v2 1/2] qapi/migration.json: add reason to MIGRATION event
Posted by Roman Khapov 9 months, 1 week ago
If I understood you right, you suggest to improve migrate_generate_event 
to accept MigrationState* instead of int* state (which is pointing to 
field MigrationState.state in all usages), and get error reason from 
MigrationState.error, if the new state is MIGRATION_STATE_FAILED, is it?

That sounds reasonable, thanks!

But I'm not sure if I got the idea of changing migrate_set_error 
correctly, can you explain in more details, please?

On 2/20/24 10:39, Peter Xu wrote:
> On Thu, Feb 15, 2024 at 05:27:58PM +0500, Roman Khapov wrote:
>>       migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
>> -                      MIGRATION_STATUS_COMPLETED);
>> +                      MIGRATION_STATUS_COMPLETED, NULL);
> Instead of enforcing migrate_set_error() to always pass an error, maybe we
> can allow migrate_generate_event() to fetch s->error in FAILED state, if
> that hint ever existed?
>
-- 
Best regards,
Roman Khapov
Re: [PATCH v2 1/2] qapi/migration.json: add reason to MIGRATION event
Posted by Peter Xu 9 months, 1 week ago
On Wed, Feb 21, 2024 at 06:47:47PM +0500, Roman Khapov wrote:
> If I understood you right, you suggest to improve migrate_generate_event to
> accept MigrationState* instead of int* state (which is pointing to field
> MigrationState.state in all usages), and get error reason from
> MigrationState.error, if the new state is MIGRATION_STATE_FAILED, is it?

migrate_generate_event() is a migration internal function, it can directly
reference s->error with error_mutex held.

> 
> That sounds reasonable, thanks!
> 
> But I'm not sure if I got the idea of changing migrate_set_error correctly,
> can you explain in more details, please?

I fat-fingered.. sorry.  I wanted to say migrate_set_state() below, and I
think migrate_set_state() can be kept untouched.

But please consider the other reviewer's comment first: if query-migrate
(or HMP "info migrate") works for you, then this interface is not needed.

> 
> On 2/20/24 10:39, Peter Xu wrote:
> > On Thu, Feb 15, 2024 at 05:27:58PM +0500, Roman Khapov wrote:
> > >       migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
> > > -                      MIGRATION_STATUS_COMPLETED);
> > > +                      MIGRATION_STATUS_COMPLETED, NULL);
> > Instead of enforcing migrate_set_error() to always pass an error, maybe we
> > can allow migrate_generate_event() to fetch s->error in FAILED state, if
> > that hint ever existed?
> > 
> -- 
> Best regards,
> Roman Khapov
> 

-- 
Peter Xu
Re: [PATCH v2 1/2] qapi/migration.json: add reason to MIGRATION event
Posted by Markus Armbruster 9 months, 2 weeks ago
Roman Khapov <rkhapov@yandex-team.ru> writes:

> This commit adds the optional field reason for the events, which
> contains the string, describing reason of status changing.
> For example: reason of migration fail.
>
> Function migrate_set_state now accepts 4th argument: the reason to
> pass to event. Every call of this function appended with NULL argument.
>
> Also migrate_set_state_err_reason was added to form reason from Error*
>
> Signed-off-by: Roman Khapov <rkhapov@yandex-team.ru>

[...]

> diff --git a/qapi/migration.json b/qapi/migration.json
> index 5a565d9b8d..33bb5b7f50 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -1392,6 +1392,7 @@
>  # Emitted when a migration event happens
>  #
>  # @status: @MigrationStatus describing the current migration status.
> +# @reason: Optional description of status changing reason.

Intended use?

When is it present?

>  #
>  # Since: 2.4
>  #
> @@ -1402,7 +1403,7 @@
>  #     "data": {"status": "completed"} }
>  ##
>  { 'event': 'MIGRATION',
> -  'data': {'status': 'MigrationStatus'}}
> +  'data': {'status': 'MigrationStatus', '*reason': 'str'}}
>  
>  ##
>  # @MIGRATION_PASS:
Re: [PATCH v2 1/2] qapi/migration.json: add reason to MIGRATION event
Posted by Roman Khapov 9 months, 1 week ago
To be clear: you meant that the description of the event must be 
extended, similar to its description on the commit message? Or you don't 
see its proper usage at all?

If the first is true, then I agree, the description can be improved, and 
it will be useful, thanks! Will add it in the next version of the patch 
soon.

On 2/16/24 11:17, Markus Armbruster wrote:
> Roman Khapov <rkhapov@yandex-team.ru> writes:
>
>> This commit adds the optional field reason for the events, which
>> contains the string, describing reason of status changing.
>> For example: reason of migration fail.
>>
>> Function migrate_set_state now accepts 4th argument: the reason to
>> pass to event. Every call of this function appended with NULL argument.
>>
>> Also migrate_set_state_err_reason was added to form reason from Error*
>>
>> Signed-off-by: Roman Khapov <rkhapov@yandex-team.ru>
> [...]
>
>> diff --git a/qapi/migration.json b/qapi/migration.json
>> index 5a565d9b8d..33bb5b7f50 100644
>> --- a/qapi/migration.json
>> +++ b/qapi/migration.json
>> @@ -1392,6 +1392,7 @@
>>   # Emitted when a migration event happens
>>   #
>>   # @status: @MigrationStatus describing the current migration status.
>> +# @reason: Optional description of status changing reason.
> Intended use?
>
> When is it present?
>
>>   #
>>   # Since: 2.4
>>   #
>> @@ -1402,7 +1403,7 @@
>>   #     "data": {"status": "completed"} }
>>   ##
>>   { 'event': 'MIGRATION',
>> -  'data': {'status': 'MigrationStatus'}}
>> +  'data': {'status': 'MigrationStatus', '*reason': 'str'}}
>>   
>>   ##
>>   # @MIGRATION_PASS:

-- 
Best regards,
Roman
Re: [PATCH v2 1/2] qapi/migration.json: add reason to MIGRATION event
Posted by Markus Armbruster 9 months, 1 week ago
Roman Khapov <rkhapov@yandex-team.ru> writes:

> To be clear: you meant that the description of the event must be extended, similar to its description on the commit message? Or you don't see its proper usage at all?

The commit message doesn't really tell me either why and how anybody
would use @reason.  Once we have a common understanding there, improving
the doc comment should be straightforward.

> If the first is true, then I agree, the description can be improved, and it will be useful, thanks! Will add it in the next version of the patch soon.
>
> On 2/16/24 11:17, Markus Armbruster wrote:
>> Roman Khapov <rkhapov@yandex-team.ru> writes:
>>
>>> This commit adds the optional field reason for the events, which
>>> contains the string, describing reason of status changing.
>>> For example: reason of migration fail.
>>>
>>> Function migrate_set_state now accepts 4th argument: the reason to
>>> pass to event. Every call of this function appended with NULL argument.
>>>
>>> Also migrate_set_state_err_reason was added to form reason from Error*
>>>
>>> Signed-off-by: Roman Khapov <rkhapov@yandex-team.ru>
>>
>> [...]
>>
>>> diff --git a/qapi/migration.json b/qapi/migration.json
>>> index 5a565d9b8d..33bb5b7f50 100644
>>> --- a/qapi/migration.json
>>> +++ b/qapi/migration.json
>>> @@ -1392,6 +1392,7 @@
>>>  # Emitted when a migration event happens
>>>  #
>>>  # @status: @MigrationStatus describing the current migration status.
>>> +# @reason: Optional description of status changing reason.
>>
>> Intended use?
>>
>> When is it present?
>>
>>>  #
>>>  # Since: 2.4
>>>  #
>>> @@ -1402,7 +1403,7 @@
>>>  #     "data": {"status": "completed"} }
>>>  ##
>>>  { 'event': 'MIGRATION',
>>> -  'data': {'status': 'MigrationStatus'}}
>>> +  'data': {'status': 'MigrationStatus', '*reason': 'str'}}
>>>  ##
>>>  # @MIGRATION_PASS:
Re: [PATCH v2 1/2] qapi/migration.json: add reason to MIGRATION event
Posted by Roman Khapov 9 months, 1 week ago
The reason in MIGRATION event can be used, when there is some helpful 
message, that can be added to improve debugging\understanding of the 
reason of migration status changing.

I propose the next usage - when qemu sends (MIGRATION status=failed) 
event, the error message describing the problem can be sent in that 
event too. This is more comfortable way to understand the problem 
comparing to reading qemu logs, especially in some cases, where qemu are 
launched and monitored through various automation processes (examples: 
cloud environments, some 'integration' tests of the qemu, etc). 
Probably, there are some other cases when the accompanying message can 
be useful, not only in failed migrations, but now I don't know that cases.

If we have common understanding with that, I will improve the doc 
comment and the commit message of the first path too in the next version 
of the series.

On 2/19/24 11:35, Markus Armbruster wrote:
> Roman Khapov <rkhapov@yandex-team.ru> writes:
>
>> To be clear: you meant that the description of the event must be extended, similar to its description on the commit message? Or you don't see its proper usage at all?
> The commit message doesn't really tell me either why and how anybody
> would use @reason.  Once we have a common understanding there, improving
> the doc comment should be straightforward.

-- 
Best regards,
Roman Khapov