[v5] migration: propagate vTPM errors using Error objects

[PATCH v5 16/23] migration: push Error **errp into loadvm_process_enable_colo()

Posted by Arun Menon 4 months ago

This is an incremental step in converting vmstate loading
code to report error via Error objects instead of directly
printing it to console/monitor.
It is ensured that loadvm_process_enable_colo() must report an error
in errp, in case of failure.

Signed-off-by: Arun Menon <armenon@redhat.com>
---
 include/migration/colo.h |  2 +-
 migration/migration.c    | 12 ++++++------
 migration/ram.c          |  8 ++++----
 migration/ram.h          |  2 +-
 migration/savevm.c       | 25 ++++++++++++-------------
 5 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/include/migration/colo.h b/include/migration/colo.h
index 43222ef5ae6adc3f7d8aa6a48bef79af33d09208..d4fe422e4d335d3bef4f860f56400fcd73287a0e 100644
--- a/include/migration/colo.h
+++ b/include/migration/colo.h
@@ -25,7 +25,7 @@ void migrate_start_colo_process(MigrationState *s);
 bool migration_in_colo_state(void);
 
 /* loadvm */
-int migration_incoming_enable_colo(void);
+int migration_incoming_enable_colo(Error **errp);
 void migration_incoming_disable_colo(void);
 bool migration_incoming_colo_enabled(void);
 bool migration_incoming_in_colo_state(void);
diff --git a/migration/migration.c b/migration/migration.c
index 10c216d25dec01f206eacad2edd24d21f00e614c..326487882c8d41e2f89f99f69df0d9d4d42705e4 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -623,22 +623,22 @@ void migration_incoming_disable_colo(void)
     migration_colo_enabled = false;
 }
 
-int migration_incoming_enable_colo(void)
+int migration_incoming_enable_colo(Error **errp)
 {
 #ifndef CONFIG_REPLICATION
-    error_report("ENABLE_COLO command come in migration stream, but the "
-                 "replication module is not built in");
+    error_setg(errp, "ENABLE_COLO command come in migration stream, but the "
+               "replication module is not built in");
     return -ENOTSUP;
 #endif
 
     if (!migrate_colo()) {
-        error_report("ENABLE_COLO command come in migration stream, but x-colo "
-                     "capability is not set");
+        error_setg(errp, "ENABLE_COLO command come in migration stream"
+                   ", but x-colo capability is not set");
         return -EINVAL;
     }
 
     if (ram_block_discard_disable(true)) {
-        error_report("COLO: cannot disable RAM discard");
+        error_setg(errp, "COLO: cannot disable RAM discard");
         return -EBUSY;
     }
     migration_colo_enabled = true;
diff --git a/migration/ram.c b/migration/ram.c
index 8223183132dc0f558f45fbae3f4f832845730bd3..607c979cc15a3d321e5e3e380ac7613d80d86fc9 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3576,7 +3576,7 @@ static void colo_init_ram_state(void)
  * memory of the secondary VM, it is need to hold the global lock
  * to call this helper.
  */
-int colo_init_ram_cache(void)
+int colo_init_ram_cache(Error **errp)
 {
     RAMBlock *block;
 
@@ -3585,9 +3585,9 @@ int colo_init_ram_cache(void)
             block->colo_cache = qemu_anon_ram_alloc(block->used_length,
                                                     NULL, false, false);
             if (!block->colo_cache) {
-                error_report("%s: Can't alloc memory for COLO cache of block %s,"
-                             "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
-                             block->used_length);
+                error_setg(errp, "%s: Can't alloc memory for COLO cache of "
+                           "block %s, size 0x" RAM_ADDR_FMT, __func__,
+                           block->idstr, block->used_length);
                 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
                     if (block->colo_cache) {
                         qemu_anon_ram_free(block->colo_cache, block->used_length);
diff --git a/migration/ram.h b/migration/ram.h
index 275709a99187f9429ccb4111e05281ec268ba0db..24cd0bf585762cfa1e86834dc03c6baeea2f0627 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -109,7 +109,7 @@ void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset,
                                    bool set);
 
 /* ram cache */
-int colo_init_ram_cache(void);
+int colo_init_ram_cache(Error **errp);
 void colo_flush_ram_cache(void);
 void colo_release_ram_cache(void);
 void colo_incoming_start_dirty_log(void);
diff --git a/migration/savevm.c b/migration/savevm.c
index 1cbc44a5314043a403d983511066cf137681a18d..755ba7e4504d377a4649da191ad9875d9fd66f69 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2510,15 +2510,19 @@ static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
     return 0;
 }
 
-static int loadvm_process_enable_colo(MigrationIncomingState *mis)
+static int loadvm_process_enable_colo(MigrationIncomingState *mis,
+                                      Error **errp)
 {
-    int ret = migration_incoming_enable_colo();
+    int ret;
 
-    if (!ret) {
-        ret = colo_init_ram_cache();
-        if (ret) {
-            migration_incoming_disable_colo();
-        }
+    if (migration_incoming_enable_colo(errp) < 0) {
+        return -1;
+    }
+
+    ret = colo_init_ram_cache(errp);
+    if (ret) {
+        error_prepend(errp, "failed to init colo RAM cache: %d", ret);
+        migration_incoming_disable_colo();
     }
     return ret;
 }
@@ -2641,12 +2645,7 @@ static int loadvm_process_command(QEMUFile *f, Error **errp)
         return loadvm_handle_recv_bitmap(mis, len, errp);
 
     case MIG_CMD_ENABLE_COLO:
-        ret = loadvm_process_enable_colo(mis);
-        if (ret < 0) {
-            error_setg(errp, "Failed to load device state command: %d", ret);
-            return -1;
-        }
-        return ret;
+        return loadvm_process_enable_colo(mis, errp);
 
     case MIG_CMD_SWITCHOVER_START:
         ret = loadvm_postcopy_handle_switchover_start();

-- 
2.50.0

Re: [PATCH v5 16/23] migration: push Error **errp into loadvm_process_enable_colo()

Posted by Akihiko Odaki 4 months ago

On 2025/07/17 9:37, Arun Menon wrote:
> This is an incremental step in converting vmstate loading
> code to report error via Error objects instead of directly
> printing it to console/monitor.
> It is ensured that loadvm_process_enable_colo() must report an error
> in errp, in case of failure.
> 
> Signed-off-by: Arun Menon <armenon@redhat.com>
> ---
>   include/migration/colo.h |  2 +-
>   migration/migration.c    | 12 ++++++------
>   migration/ram.c          |  8 ++++----
>   migration/ram.h          |  2 +-
>   migration/savevm.c       | 25 ++++++++++++-------------
>   5 files changed, 24 insertions(+), 25 deletions(-)
> 
> diff --git a/include/migration/colo.h b/include/migration/colo.h
> index 43222ef5ae6adc3f7d8aa6a48bef79af33d09208..d4fe422e4d335d3bef4f860f56400fcd73287a0e 100644
> --- a/include/migration/colo.h
> +++ b/include/migration/colo.h
> @@ -25,7 +25,7 @@ void migrate_start_colo_process(MigrationState *s);
>   bool migration_in_colo_state(void);
>   
>   /* loadvm */
> -int migration_incoming_enable_colo(void);
> +int migration_incoming_enable_colo(Error **errp);
>   void migration_incoming_disable_colo(void);
>   bool migration_incoming_colo_enabled(void);
>   bool migration_incoming_in_colo_state(void);
> diff --git a/migration/migration.c b/migration/migration.c
> index 10c216d25dec01f206eacad2edd24d21f00e614c..326487882c8d41e2f89f99f69df0d9d4d42705e4 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -623,22 +623,22 @@ void migration_incoming_disable_colo(void)
>       migration_colo_enabled = false;
>   }
>   
> -int migration_incoming_enable_colo(void)
> +int migration_incoming_enable_colo(Error **errp)
>   {
>   #ifndef CONFIG_REPLICATION
> -    error_report("ENABLE_COLO command come in migration stream, but the "
> -                 "replication module is not built in");
> +    error_setg(errp, "ENABLE_COLO command come in migration stream, but the "
> +               "replication module is not built in");
>       return -ENOTSUP;
>   #endif
>   
>       if (!migrate_colo()) {
> -        error_report("ENABLE_COLO command come in migration stream, but x-colo "
> -                     "capability is not set");
> +        error_setg(errp, "ENABLE_COLO command come in migration stream"
> +                   ", but x-colo capability is not set");
>           return -EINVAL;
>       }
>   
>       if (ram_block_discard_disable(true)) {
> -        error_report("COLO: cannot disable RAM discard");
> +        error_setg(errp, "COLO: cannot disable RAM discard");
>           return -EBUSY;
>       }
>       migration_colo_enabled = true;
> diff --git a/migration/ram.c b/migration/ram.c
> index 8223183132dc0f558f45fbae3f4f832845730bd3..607c979cc15a3d321e5e3e380ac7613d80d86fc9 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -3576,7 +3576,7 @@ static void colo_init_ram_state(void)
>    * memory of the secondary VM, it is need to hold the global lock
>    * to call this helper.
>    */
> -int colo_init_ram_cache(void)
> +int colo_init_ram_cache(Error **errp)
>   {
>       RAMBlock *block;
>   
> @@ -3585,9 +3585,9 @@ int colo_init_ram_cache(void)
>               block->colo_cache = qemu_anon_ram_alloc(block->used_length,
>                                                       NULL, false, false);
>               if (!block->colo_cache) {
> -                error_report("%s: Can't alloc memory for COLO cache of block %s,"
> -                             "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
> -                             block->used_length);
> +                error_setg(errp, "%s: Can't alloc memory for COLO cache of "
> +                           "block %s, size 0x" RAM_ADDR_FMT, __func__,
> +                           block->idstr, block->used_length);
>                   RAMBLOCK_FOREACH_NOT_IGNORED(block) {
>                       if (block->colo_cache) {
>                           qemu_anon_ram_free(block->colo_cache, block->used_length);
> diff --git a/migration/ram.h b/migration/ram.h
> index 275709a99187f9429ccb4111e05281ec268ba0db..24cd0bf585762cfa1e86834dc03c6baeea2f0627 100644
> --- a/migration/ram.h
> +++ b/migration/ram.h
> @@ -109,7 +109,7 @@ void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset,
>                                      bool set);
>   
>   /* ram cache */
> -int colo_init_ram_cache(void);
> +int colo_init_ram_cache(Error **errp);
>   void colo_flush_ram_cache(void);
>   void colo_release_ram_cache(void);
>   void colo_incoming_start_dirty_log(void);
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 1cbc44a5314043a403d983511066cf137681a18d..755ba7e4504d377a4649da191ad9875d9fd66f69 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -2510,15 +2510,19 @@ static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
>       return 0;
>   }
>   
> -static int loadvm_process_enable_colo(MigrationIncomingState *mis)
> +static int loadvm_process_enable_colo(MigrationIncomingState *mis,
> +                                      Error **errp)
>   {
> -    int ret = migration_incoming_enable_colo();
> +    int ret;
>   
> -    if (!ret) {
> -        ret = colo_init_ram_cache();
> -        if (ret) {
> -            migration_incoming_disable_colo();
> -        }
> +    if (migration_incoming_enable_colo(errp) < 0) {
> +        return -1;

Returning -1 here while colo_init_ram_cache() returns -errno results in 
an inconsistent semantics of this function's return value.

Ideally, I think this function (and other similar functions) should stop 
returning -errno and instead return a bool value to prevent callers to 
make any assumption based on the return value other than 
success/failure. But that could be done later; let it return -errno 
until then.

Re: [PATCH v5 16/23] migration: push Error **errp into loadvm_process_enable_colo()

Posted by Arun Menon 4 months ago

On Thu, Jul 17, 2025 at 12:26:16PM +0900, Akihiko Odaki wrote:
> On 2025/07/17 9:37, Arun Menon wrote:
> > This is an incremental step in converting vmstate loading
> > code to report error via Error objects instead of directly
> > printing it to console/monitor.
> > It is ensured that loadvm_process_enable_colo() must report an error
> > in errp, in case of failure.
> > 
> > Signed-off-by: Arun Menon <armenon@redhat.com>
> > ---
> >   include/migration/colo.h |  2 +-
> >   migration/migration.c    | 12 ++++++------
> >   migration/ram.c          |  8 ++++----
> >   migration/ram.h          |  2 +-
> >   migration/savevm.c       | 25 ++++++++++++-------------
> >   5 files changed, 24 insertions(+), 25 deletions(-)
> > 
> > diff --git a/include/migration/colo.h b/include/migration/colo.h
> > index 43222ef5ae6adc3f7d8aa6a48bef79af33d09208..d4fe422e4d335d3bef4f860f56400fcd73287a0e 100644
> > --- a/include/migration/colo.h
> > +++ b/include/migration/colo.h
> > @@ -25,7 +25,7 @@ void migrate_start_colo_process(MigrationState *s);
> >   bool migration_in_colo_state(void);
> >   /* loadvm */
> > -int migration_incoming_enable_colo(void);
> > +int migration_incoming_enable_colo(Error **errp);
> >   void migration_incoming_disable_colo(void);
> >   bool migration_incoming_colo_enabled(void);
> >   bool migration_incoming_in_colo_state(void);
> > diff --git a/migration/migration.c b/migration/migration.c
> > index 10c216d25dec01f206eacad2edd24d21f00e614c..326487882c8d41e2f89f99f69df0d9d4d42705e4 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -623,22 +623,22 @@ void migration_incoming_disable_colo(void)
> >       migration_colo_enabled = false;
> >   }
> > -int migration_incoming_enable_colo(void)
> > +int migration_incoming_enable_colo(Error **errp)
> >   {
> >   #ifndef CONFIG_REPLICATION
> > -    error_report("ENABLE_COLO command come in migration stream, but the "
> > -                 "replication module is not built in");
> > +    error_setg(errp, "ENABLE_COLO command come in migration stream, but the "
> > +               "replication module is not built in");
> >       return -ENOTSUP;
> >   #endif
> >       if (!migrate_colo()) {
> > -        error_report("ENABLE_COLO command come in migration stream, but x-colo "
> > -                     "capability is not set");
> > +        error_setg(errp, "ENABLE_COLO command come in migration stream"
> > +                   ", but x-colo capability is not set");
> >           return -EINVAL;
> >       }
> >       if (ram_block_discard_disable(true)) {
> > -        error_report("COLO: cannot disable RAM discard");
> > +        error_setg(errp, "COLO: cannot disable RAM discard");
> >           return -EBUSY;
> >       }
> >       migration_colo_enabled = true;
> > diff --git a/migration/ram.c b/migration/ram.c
> > index 8223183132dc0f558f45fbae3f4f832845730bd3..607c979cc15a3d321e5e3e380ac7613d80d86fc9 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -3576,7 +3576,7 @@ static void colo_init_ram_state(void)
> >    * memory of the secondary VM, it is need to hold the global lock
> >    * to call this helper.
> >    */
> > -int colo_init_ram_cache(void)
> > +int colo_init_ram_cache(Error **errp)
> >   {
> >       RAMBlock *block;
> > @@ -3585,9 +3585,9 @@ int colo_init_ram_cache(void)
> >               block->colo_cache = qemu_anon_ram_alloc(block->used_length,
> >                                                       NULL, false, false);
> >               if (!block->colo_cache) {
> > -                error_report("%s: Can't alloc memory for COLO cache of block %s,"
> > -                             "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
> > -                             block->used_length);
> > +                error_setg(errp, "%s: Can't alloc memory for COLO cache of "
> > +                           "block %s, size 0x" RAM_ADDR_FMT, __func__,
> > +                           block->idstr, block->used_length);
> >                   RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> >                       if (block->colo_cache) {
> >                           qemu_anon_ram_free(block->colo_cache, block->used_length);
> > diff --git a/migration/ram.h b/migration/ram.h
> > index 275709a99187f9429ccb4111e05281ec268ba0db..24cd0bf585762cfa1e86834dc03c6baeea2f0627 100644
> > --- a/migration/ram.h
> > +++ b/migration/ram.h
> > @@ -109,7 +109,7 @@ void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset,
> >                                      bool set);
> >   /* ram cache */
> > -int colo_init_ram_cache(void);
> > +int colo_init_ram_cache(Error **errp);
> >   void colo_flush_ram_cache(void);
> >   void colo_release_ram_cache(void);
> >   void colo_incoming_start_dirty_log(void);
> > diff --git a/migration/savevm.c b/migration/savevm.c
> > index 1cbc44a5314043a403d983511066cf137681a18d..755ba7e4504d377a4649da191ad9875d9fd66f69 100644
> > --- a/migration/savevm.c
> > +++ b/migration/savevm.c
> > @@ -2510,15 +2510,19 @@ static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
> >       return 0;
> >   }
> > -static int loadvm_process_enable_colo(MigrationIncomingState *mis)
> > +static int loadvm_process_enable_colo(MigrationIncomingState *mis,
> > +                                      Error **errp)
> >   {
> > -    int ret = migration_incoming_enable_colo();
> > +    int ret;
> > -    if (!ret) {
> > -        ret = colo_init_ram_cache();
> > -        if (ret) {
> > -            migration_incoming_disable_colo();
> > -        }
> > +    if (migration_incoming_enable_colo(errp) < 0) {
> > +        return -1;
> 
> Returning -1 here while colo_init_ram_cache() returns -errno results in an
> inconsistent semantics of this function's return value.

If I understand correctly, you mean, if migration_incoming_enable_colo() fails, it should 
return the errno instead of -1. Will do.
Thanks

> 
> Ideally, I think this function (and other similar functions) should stop
> returning -errno and instead return a bool value to prevent callers to make
> any assumption based on the return value other than success/failure. But
> that could be done later; let it return -errno until then.
> 
yes