[v9] migration: propagate vTPM errors using Error objects

[PATCH v9 20/27] migration: push Error **errp into loadvm_process_enable_colo()

Posted by Arun Menon 6 months, 1 week ago

This is an incremental step in converting vmstate loading
code to report error via Error objects instead of directly
printing it to console/monitor.
It is ensured that loadvm_process_enable_colo() must report an error
in errp, in case of failure.

Signed-off-by: Arun Menon <armenon@redhat.com>
---
 include/migration/colo.h |  2 +-
 migration/migration.c    | 12 ++++++------
 migration/ram.c          |  8 ++++----
 migration/ram.h          |  2 +-
 migration/savevm.c       | 26 ++++++++++++++------------
 5 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/include/migration/colo.h b/include/migration/colo.h
index 43222ef5ae6adc3f7d8aa6a48bef79af33d09208..d4fe422e4d335d3bef4f860f56400fcd73287a0e 100644
--- a/include/migration/colo.h
+++ b/include/migration/colo.h
@@ -25,7 +25,7 @@ void migrate_start_colo_process(MigrationState *s);
 bool migration_in_colo_state(void);
 
 /* loadvm */
-int migration_incoming_enable_colo(void);
+int migration_incoming_enable_colo(Error **errp);
 void migration_incoming_disable_colo(void);
 bool migration_incoming_colo_enabled(void);
 bool migration_incoming_in_colo_state(void);
diff --git a/migration/migration.c b/migration/migration.c
index 6962dc7d7f3e0121d28994c98f12f9f2258343d7..4a76d7ed730589bae87115368b0bf4819f8b161e 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -623,22 +623,22 @@ void migration_incoming_disable_colo(void)
     migration_colo_enabled = false;
 }
 
-int migration_incoming_enable_colo(void)
+int migration_incoming_enable_colo(Error **errp)
 {
 #ifndef CONFIG_REPLICATION
-    error_report("ENABLE_COLO command come in migration stream, but the "
-                 "replication module is not built in");
+    error_setg(errp, "ENABLE_COLO command come in migration stream, but the "
+               "replication module is not built in");
     return -ENOTSUP;
 #endif
 
     if (!migrate_colo()) {
-        error_report("ENABLE_COLO command come in migration stream, but x-colo "
-                     "capability is not set");
+        error_setg(errp, "ENABLE_COLO command come in migration stream"
+                   ", but x-colo capability is not set");
         return -EINVAL;
     }
 
     if (ram_block_discard_disable(true)) {
-        error_report("COLO: cannot disable RAM discard");
+        error_setg(errp, "COLO: cannot disable RAM discard");
         return -EBUSY;
     }
     migration_colo_enabled = true;
diff --git a/migration/ram.c b/migration/ram.c
index 6a0dcc04f436524a37672c41c38f201f06773374..995431c9e320f443c385c29d664d62e18c1afd90 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3576,7 +3576,7 @@ static void colo_init_ram_state(void)
  * memory of the secondary VM, it is need to hold the global lock
  * to call this helper.
  */
-int colo_init_ram_cache(void)
+int colo_init_ram_cache(Error **errp)
 {
     RAMBlock *block;
 
@@ -3585,9 +3585,9 @@ int colo_init_ram_cache(void)
             block->colo_cache = qemu_anon_ram_alloc(block->used_length,
                                                     NULL, false, false);
             if (!block->colo_cache) {
-                error_report("%s: Can't alloc memory for COLO cache of block %s,"
-                             "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
-                             block->used_length);
+                error_setg(errp, "Can't alloc memory for COLO cache of "
+                           "block %s, size 0x" RAM_ADDR_FMT,
+                           block->idstr, block->used_length);
                 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
                     if (block->colo_cache) {
                         qemu_anon_ram_free(block->colo_cache, block->used_length);
diff --git a/migration/ram.h b/migration/ram.h
index 275709a99187f9429ccb4111e05281ec268ba0db..24cd0bf585762cfa1e86834dc03c6baeea2f0627 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -109,7 +109,7 @@ void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset,
                                    bool set);
 
 /* ram cache */
-int colo_init_ram_cache(void);
+int colo_init_ram_cache(Error **errp);
 void colo_flush_ram_cache(void);
 void colo_release_ram_cache(void);
 void colo_incoming_start_dirty_log(void);
diff --git a/migration/savevm.c b/migration/savevm.c
index 938adb20270adbf9546b0884d0877c25c3f0f816..a6b71a958aeda31e89043f8103bfe2fc89542fb5 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2519,15 +2519,21 @@ static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
     return 0;
 }
 
-static int loadvm_process_enable_colo(MigrationIncomingState *mis)
+static int loadvm_process_enable_colo(MigrationIncomingState *mis,
+                                      Error **errp)
 {
-    int ret = migration_incoming_enable_colo();
+    ERRP_GUARD();
+    int ret;
 
-    if (!ret) {
-        ret = colo_init_ram_cache();
-        if (ret) {
-            migration_incoming_disable_colo();
-        }
+    ret = migration_incoming_enable_colo(errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = colo_init_ram_cache(errp);
+    if (ret) {
+        error_prepend(errp, "failed to init colo RAM cache: %d: ", ret);
+        migration_incoming_disable_colo();
     }
     return ret;
 }
@@ -2646,11 +2652,7 @@ static int loadvm_process_command(QEMUFile *f, Error **errp)
         return loadvm_handle_recv_bitmap(mis, len, errp);
 
     case MIG_CMD_ENABLE_COLO:
-        ret = loadvm_process_enable_colo(mis);
-        if (ret < 0) {
-            error_setg(errp, "Failed to load device state command: %d", ret);
-        }
-        return ret;
+        return loadvm_process_enable_colo(mis, errp);
 
     case MIG_CMD_SWITCHOVER_START:
         ret = loadvm_postcopy_handle_switchover_start();

-- 
2.50.1

Re: [PATCH v9 20/27] migration: push Error **errp into loadvm_process_enable_colo()

Posted by Marc-André Lureau 6 months, 1 week ago

Hi

On Tue, Aug 5, 2025 at 10:31 PM Arun Menon <armenon@redhat.com> wrote:

> This is an incremental step in converting vmstate loading
> code to report error via Error objects instead of directly
> printing it to console/monitor.
> It is ensured that loadvm_process_enable_colo() must report an error
> in errp, in case of failure.
>
> Signed-off-by: Arun Menon <armenon@redhat.com>
>

Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>


> ---
>  include/migration/colo.h |  2 +-
>  migration/migration.c    | 12 ++++++------
>  migration/ram.c          |  8 ++++----
>  migration/ram.h          |  2 +-
>  migration/savevm.c       | 26 ++++++++++++++------------
>  5 files changed, 26 insertions(+), 24 deletions(-)
>
> diff --git a/include/migration/colo.h b/include/migration/colo.h
> index
> 43222ef5ae6adc3f7d8aa6a48bef79af33d09208..d4fe422e4d335d3bef4f860f56400fcd73287a0e
> 100644
> --- a/include/migration/colo.h
> +++ b/include/migration/colo.h
> @@ -25,7 +25,7 @@ void migrate_start_colo_process(MigrationState *s);
>  bool migration_in_colo_state(void);
>
>  /* loadvm */
> -int migration_incoming_enable_colo(void);
> +int migration_incoming_enable_colo(Error **errp);
>  void migration_incoming_disable_colo(void);
>  bool migration_incoming_colo_enabled(void);
>  bool migration_incoming_in_colo_state(void);
> diff --git a/migration/migration.c b/migration/migration.c
> index
> 6962dc7d7f3e0121d28994c98f12f9f2258343d7..4a76d7ed730589bae87115368b0bf4819f8b161e
> 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -623,22 +623,22 @@ void migration_incoming_disable_colo(void)
>      migration_colo_enabled = false;
>  }
>
> -int migration_incoming_enable_colo(void)
> +int migration_incoming_enable_colo(Error **errp)
>  {
>  #ifndef CONFIG_REPLICATION
> -    error_report("ENABLE_COLO command come in migration stream, but the "
> -                 "replication module is not built in");
> +    error_setg(errp, "ENABLE_COLO command come in migration stream, but
> the "
> +               "replication module is not built in");
>      return -ENOTSUP;
>  #endif
>
>      if (!migrate_colo()) {
> -        error_report("ENABLE_COLO command come in migration stream, but
> x-colo "
> -                     "capability is not set");
> +        error_setg(errp, "ENABLE_COLO command come in migration stream"
> +                   ", but x-colo capability is not set");
>          return -EINVAL;
>      }
>
>      if (ram_block_discard_disable(true)) {
> -        error_report("COLO: cannot disable RAM discard");
> +        error_setg(errp, "COLO: cannot disable RAM discard");
>          return -EBUSY;
>      }
>      migration_colo_enabled = true;
> diff --git a/migration/ram.c b/migration/ram.c
> index
> 6a0dcc04f436524a37672c41c38f201f06773374..995431c9e320f443c385c29d664d62e18c1afd90
> 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -3576,7 +3576,7 @@ static void colo_init_ram_state(void)
>   * memory of the secondary VM, it is need to hold the global lock
>   * to call this helper.
>   */
> -int colo_init_ram_cache(void)
> +int colo_init_ram_cache(Error **errp)
>  {
>      RAMBlock *block;
>
> @@ -3585,9 +3585,9 @@ int colo_init_ram_cache(void)
>              block->colo_cache = qemu_anon_ram_alloc(block->used_length,
>                                                      NULL, false, false);
>              if (!block->colo_cache) {
> -                error_report("%s: Can't alloc memory for COLO cache of
> block %s,"
> -                             "size 0x" RAM_ADDR_FMT, __func__,
> block->idstr,
> -                             block->used_length);
> +                error_setg(errp, "Can't alloc memory for COLO cache of "
> +                           "block %s, size 0x" RAM_ADDR_FMT,
> +                           block->idstr, block->used_length);
>                  RAMBLOCK_FOREACH_NOT_IGNORED(block) {
>                      if (block->colo_cache) {
>                          qemu_anon_ram_free(block->colo_cache,
> block->used_length);
> diff --git a/migration/ram.h b/migration/ram.h
> index
> 275709a99187f9429ccb4111e05281ec268ba0db..24cd0bf585762cfa1e86834dc03c6baeea2f0627
> 100644
> --- a/migration/ram.h
> +++ b/migration/ram.h
> @@ -109,7 +109,7 @@ void ramblock_set_file_bmap_atomic(RAMBlock *block,
> ram_addr_t offset,
>                                     bool set);
>
>  /* ram cache */
> -int colo_init_ram_cache(void);
> +int colo_init_ram_cache(Error **errp);
>  void colo_flush_ram_cache(void);
>  void colo_release_ram_cache(void);
>  void colo_incoming_start_dirty_log(void);
> diff --git a/migration/savevm.c b/migration/savevm.c
> index
> 938adb20270adbf9546b0884d0877c25c3f0f816..a6b71a958aeda31e89043f8103bfe2fc89542fb5
> 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -2519,15 +2519,21 @@ static int
> loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
>      return 0;
>  }
>
> -static int loadvm_process_enable_colo(MigrationIncomingState *mis)
> +static int loadvm_process_enable_colo(MigrationIncomingState *mis,
> +                                      Error **errp)
>  {
> -    int ret = migration_incoming_enable_colo();
> +    ERRP_GUARD();
> +    int ret;
>
> -    if (!ret) {
> -        ret = colo_init_ram_cache();
> -        if (ret) {
> -            migration_incoming_disable_colo();
> -        }
> +    ret = migration_incoming_enable_colo(errp);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    ret = colo_init_ram_cache(errp);
> +    if (ret) {
>

Note: here you keep the "ret" error condition !=0, ok.

colo_init_ram_cache(), returns -errno on error. Although errno should
remain unchanged on success (during qemu_anon_ram_free etc), I think it
would be safer to convert the function to follow the recommended
bool-valued function for true on success / false on failure instead.


> +        error_prepend(errp, "failed to init colo RAM cache: %d: ", ret);
> +        migration_incoming_disable_colo();
>
     }
>      return ret;
>  }
> @@ -2646,11 +2652,7 @@ static int loadvm_process_command(QEMUFile *f,
> Error **errp)
>          return loadvm_handle_recv_bitmap(mis, len, errp);
>
>      case MIG_CMD_ENABLE_COLO:
> -        ret = loadvm_process_enable_colo(mis);
> -        if (ret < 0) {
> -            error_setg(errp, "Failed to load device state command: %d",
> ret);
> -        }
> -        return ret;
> +        return loadvm_process_enable_colo(mis, errp);
>
>      case MIG_CMD_SWITCHOVER_START:
>          ret = loadvm_postcopy_handle_switchover_start();
>
> --
> 2.50.1
>
>

Re: [PATCH v9 20/27] migration: push Error **errp into loadvm_process_enable_colo()

Posted by Arun Menon 6 months, 1 week ago

Hi Marc-André,
Thanks for the review and the Reviewed-by tag.

On Wed, Aug 06, 2025 at 12:07:25PM +0400, Marc-André Lureau wrote:
> Hi
> 
> On Tue, Aug 5, 2025 at 10:31 PM Arun Menon <armenon@redhat.com> wrote:
> 
> > This is an incremental step in converting vmstate loading
> > code to report error via Error objects instead of directly
> > printing it to console/monitor.
> > It is ensured that loadvm_process_enable_colo() must report an error
> > in errp, in case of failure.
> >
> > Signed-off-by: Arun Menon <armenon@redhat.com>
> >
> 
> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> 
> 
> > ---
> >  include/migration/colo.h |  2 +-
> >  migration/migration.c    | 12 ++++++------
> >  migration/ram.c          |  8 ++++----
> >  migration/ram.h          |  2 +-
> >  migration/savevm.c       | 26 ++++++++++++++------------
> >  5 files changed, 26 insertions(+), 24 deletions(-)
> >
> > diff --git a/include/migration/colo.h b/include/migration/colo.h
> > index
> > 43222ef5ae6adc3f7d8aa6a48bef79af33d09208..d4fe422e4d335d3bef4f860f56400fcd73287a0e
> > 100644
> > --- a/include/migration/colo.h
> > +++ b/include/migration/colo.h
> > @@ -25,7 +25,7 @@ void migrate_start_colo_process(MigrationState *s);
> >  bool migration_in_colo_state(void);
> >
> >  /* loadvm */
> > -int migration_incoming_enable_colo(void);
> > +int migration_incoming_enable_colo(Error **errp);
> >  void migration_incoming_disable_colo(void);
> >  bool migration_incoming_colo_enabled(void);
> >  bool migration_incoming_in_colo_state(void);
> > diff --git a/migration/migration.c b/migration/migration.c
> > index
> > 6962dc7d7f3e0121d28994c98f12f9f2258343d7..4a76d7ed730589bae87115368b0bf4819f8b161e
> > 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -623,22 +623,22 @@ void migration_incoming_disable_colo(void)
> >      migration_colo_enabled = false;
> >  }
> >
> > -int migration_incoming_enable_colo(void)
> > +int migration_incoming_enable_colo(Error **errp)
> >  {
> >  #ifndef CONFIG_REPLICATION
> > -    error_report("ENABLE_COLO command come in migration stream, but the "
> > -                 "replication module is not built in");
> > +    error_setg(errp, "ENABLE_COLO command come in migration stream, but
> > the "
> > +               "replication module is not built in");
> >      return -ENOTSUP;
> >  #endif
> >
> >      if (!migrate_colo()) {
> > -        error_report("ENABLE_COLO command come in migration stream, but
> > x-colo "
> > -                     "capability is not set");
> > +        error_setg(errp, "ENABLE_COLO command come in migration stream"
> > +                   ", but x-colo capability is not set");
> >          return -EINVAL;
> >      }
> >
> >      if (ram_block_discard_disable(true)) {
> > -        error_report("COLO: cannot disable RAM discard");
> > +        error_setg(errp, "COLO: cannot disable RAM discard");
> >          return -EBUSY;
> >      }
> >      migration_colo_enabled = true;
> > diff --git a/migration/ram.c b/migration/ram.c
> > index
> > 6a0dcc04f436524a37672c41c38f201f06773374..995431c9e320f443c385c29d664d62e18c1afd90
> > 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -3576,7 +3576,7 @@ static void colo_init_ram_state(void)
> >   * memory of the secondary VM, it is need to hold the global lock
> >   * to call this helper.
> >   */
> > -int colo_init_ram_cache(void)
> > +int colo_init_ram_cache(Error **errp)
> >  {
> >      RAMBlock *block;
> >
> > @@ -3585,9 +3585,9 @@ int colo_init_ram_cache(void)
> >              block->colo_cache = qemu_anon_ram_alloc(block->used_length,
> >                                                      NULL, false, false);
> >              if (!block->colo_cache) {
> > -                error_report("%s: Can't alloc memory for COLO cache of
> > block %s,"
> > -                             "size 0x" RAM_ADDR_FMT, __func__,
> > block->idstr,
> > -                             block->used_length);
> > +                error_setg(errp, "Can't alloc memory for COLO cache of "
> > +                           "block %s, size 0x" RAM_ADDR_FMT,
> > +                           block->idstr, block->used_length);
> >                  RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> >                      if (block->colo_cache) {
> >                          qemu_anon_ram_free(block->colo_cache,
> > block->used_length);
> > diff --git a/migration/ram.h b/migration/ram.h
> > index
> > 275709a99187f9429ccb4111e05281ec268ba0db..24cd0bf585762cfa1e86834dc03c6baeea2f0627
> > 100644
> > --- a/migration/ram.h
> > +++ b/migration/ram.h
> > @@ -109,7 +109,7 @@ void ramblock_set_file_bmap_atomic(RAMBlock *block,
> > ram_addr_t offset,
> >                                     bool set);
> >
> >  /* ram cache */
> > -int colo_init_ram_cache(void);
> > +int colo_init_ram_cache(Error **errp);
> >  void colo_flush_ram_cache(void);
> >  void colo_release_ram_cache(void);
> >  void colo_incoming_start_dirty_log(void);
> > diff --git a/migration/savevm.c b/migration/savevm.c
> > index
> > 938adb20270adbf9546b0884d0877c25c3f0f816..a6b71a958aeda31e89043f8103bfe2fc89542fb5
> > 100644
> > --- a/migration/savevm.c
> > +++ b/migration/savevm.c
> > @@ -2519,15 +2519,21 @@ static int
> > loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
> >      return 0;
> >  }
> >
> > -static int loadvm_process_enable_colo(MigrationIncomingState *mis)
> > +static int loadvm_process_enable_colo(MigrationIncomingState *mis,
> > +                                      Error **errp)
> >  {
> > -    int ret = migration_incoming_enable_colo();
> > +    ERRP_GUARD();
> > +    int ret;
> >
> > -    if (!ret) {
> > -        ret = colo_init_ram_cache();
> > -        if (ret) {
> > -            migration_incoming_disable_colo();
> > -        }
> > +    ret = migration_incoming_enable_colo(errp);
> > +    if (ret < 0) {
> > +        return ret;
> > +    }
> > +
> > +    ret = colo_init_ram_cache(errp);
> > +    if (ret) {
> >
> 
> Note: here you keep the "ret" error condition !=0, ok.
> 
> colo_init_ram_cache(), returns -errno on error. Although errno should
> remain unchanged on success (during qemu_anon_ram_free etc), I think it
> would be safer to convert the function to follow the recommended
> bool-valued function for true on success / false on failure instead.

There has been some discussion on this here:
https://lore.kernel.org/all/aH5AtUcjI3HYXdBe@redhat.com/
It has been advised not to use bool return.

> 
> 
> > +        error_prepend(errp, "failed to init colo RAM cache: %d: ", ret);
> > +        migration_incoming_disable_colo();
> >
>      }
> >      return ret;
> >  }
> > @@ -2646,11 +2652,7 @@ static int loadvm_process_command(QEMUFile *f,
> > Error **errp)
> >          return loadvm_handle_recv_bitmap(mis, len, errp);
> >
> >      case MIG_CMD_ENABLE_COLO:
> > -        ret = loadvm_process_enable_colo(mis);
> > -        if (ret < 0) {
> > -            error_setg(errp, "Failed to load device state command: %d",
> > ret);
> > -        }
> > -        return ret;
> > +        return loadvm_process_enable_colo(mis, errp);
> >
> >      case MIG_CMD_SWITCHOVER_START:
> >          ret = loadvm_postcopy_handle_switchover_start();
> >
> > --
> > 2.50.1
> >
> >
Regards,
Arun

Re: [PATCH v9 20/27] migration: push Error **errp into loadvm_process_enable_colo()

Posted by Marc-André Lureau 6 months, 1 week ago

Hi

On Wed, Aug 6, 2025 at 1:58 PM Arun Menon <armenon@redhat.com> wrote:
>
> Hi Marc-André,
> Thanks for the review and the Reviewed-by tag.
>
> On Wed, Aug 06, 2025 at 12:07:25PM +0400, Marc-André Lureau wrote:
> > Hi
> >
> > On Tue, Aug 5, 2025 at 10:31 PM Arun Menon <armenon@redhat.com> wrote:
> >
> > > This is an incremental step in converting vmstate loading
> > > code to report error via Error objects instead of directly
> > > printing it to console/monitor.
> > > It is ensured that loadvm_process_enable_colo() must report an error
> > > in errp, in case of failure.
> > >
> > > Signed-off-by: Arun Menon <armenon@redhat.com>
> > >
> >
> > Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> >
> >
> > > ---
> > >  include/migration/colo.h |  2 +-
> > >  migration/migration.c    | 12 ++++++------
> > >  migration/ram.c          |  8 ++++----
> > >  migration/ram.h          |  2 +-
> > >  migration/savevm.c       | 26 ++++++++++++++------------
> > >  5 files changed, 26 insertions(+), 24 deletions(-)
> > >
> > > diff --git a/include/migration/colo.h b/include/migration/colo.h
> > > index
> > > 43222ef5ae6adc3f7d8aa6a48bef79af33d09208..d4fe422e4d335d3bef4f860f56400fcd73287a0e
> > > 100644
> > > --- a/include/migration/colo.h
> > > +++ b/include/migration/colo.h
> > > @@ -25,7 +25,7 @@ void migrate_start_colo_process(MigrationState *s);
> > >  bool migration_in_colo_state(void);
> > >
> > >  /* loadvm */
> > > -int migration_incoming_enable_colo(void);
> > > +int migration_incoming_enable_colo(Error **errp);
> > >  void migration_incoming_disable_colo(void);
> > >  bool migration_incoming_colo_enabled(void);
> > >  bool migration_incoming_in_colo_state(void);
> > > diff --git a/migration/migration.c b/migration/migration.c
> > > index
> > > 6962dc7d7f3e0121d28994c98f12f9f2258343d7..4a76d7ed730589bae87115368b0bf4819f8b161e
> > > 100644
> > > --- a/migration/migration.c
> > > +++ b/migration/migration.c
> > > @@ -623,22 +623,22 @@ void migration_incoming_disable_colo(void)
> > >      migration_colo_enabled = false;
> > >  }
> > >
> > > -int migration_incoming_enable_colo(void)
> > > +int migration_incoming_enable_colo(Error **errp)
> > >  {
> > >  #ifndef CONFIG_REPLICATION
> > > -    error_report("ENABLE_COLO command come in migration stream, but the "
> > > -                 "replication module is not built in");
> > > +    error_setg(errp, "ENABLE_COLO command come in migration stream, but
> > > the "
> > > +               "replication module is not built in");
> > >      return -ENOTSUP;
> > >  #endif
> > >
> > >      if (!migrate_colo()) {
> > > -        error_report("ENABLE_COLO command come in migration stream, but
> > > x-colo "
> > > -                     "capability is not set");
> > > +        error_setg(errp, "ENABLE_COLO command come in migration stream"
> > > +                   ", but x-colo capability is not set");
> > >          return -EINVAL;
> > >      }
> > >
> > >      if (ram_block_discard_disable(true)) {
> > > -        error_report("COLO: cannot disable RAM discard");
> > > +        error_setg(errp, "COLO: cannot disable RAM discard");
> > >          return -EBUSY;
> > >      }
> > >      migration_colo_enabled = true;
> > > diff --git a/migration/ram.c b/migration/ram.c
> > > index
> > > 6a0dcc04f436524a37672c41c38f201f06773374..995431c9e320f443c385c29d664d62e18c1afd90
> > > 100644
> > > --- a/migration/ram.c
> > > +++ b/migration/ram.c
> > > @@ -3576,7 +3576,7 @@ static void colo_init_ram_state(void)
> > >   * memory of the secondary VM, it is need to hold the global lock
> > >   * to call this helper.
> > >   */
> > > -int colo_init_ram_cache(void)
> > > +int colo_init_ram_cache(Error **errp)
> > >  {
> > >      RAMBlock *block;
> > >
> > > @@ -3585,9 +3585,9 @@ int colo_init_ram_cache(void)
> > >              block->colo_cache = qemu_anon_ram_alloc(block->used_length,
> > >                                                      NULL, false, false);
> > >              if (!block->colo_cache) {
> > > -                error_report("%s: Can't alloc memory for COLO cache of
> > > block %s,"
> > > -                             "size 0x" RAM_ADDR_FMT, __func__,
> > > block->idstr,
> > > -                             block->used_length);
> > > +                error_setg(errp, "Can't alloc memory for COLO cache of "
> > > +                           "block %s, size 0x" RAM_ADDR_FMT,
> > > +                           block->idstr, block->used_length);
> > >                  RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> > >                      if (block->colo_cache) {
> > >                          qemu_anon_ram_free(block->colo_cache,
> > > block->used_length);
> > > diff --git a/migration/ram.h b/migration/ram.h
> > > index
> > > 275709a99187f9429ccb4111e05281ec268ba0db..24cd0bf585762cfa1e86834dc03c6baeea2f0627
> > > 100644
> > > --- a/migration/ram.h
> > > +++ b/migration/ram.h
> > > @@ -109,7 +109,7 @@ void ramblock_set_file_bmap_atomic(RAMBlock *block,
> > > ram_addr_t offset,
> > >                                     bool set);
> > >
> > >  /* ram cache */
> > > -int colo_init_ram_cache(void);
> > > +int colo_init_ram_cache(Error **errp);
> > >  void colo_flush_ram_cache(void);
> > >  void colo_release_ram_cache(void);
> > >  void colo_incoming_start_dirty_log(void);
> > > diff --git a/migration/savevm.c b/migration/savevm.c
> > > index
> > > 938adb20270adbf9546b0884d0877c25c3f0f816..a6b71a958aeda31e89043f8103bfe2fc89542fb5
> > > 100644
> > > --- a/migration/savevm.c
> > > +++ b/migration/savevm.c
> > > @@ -2519,15 +2519,21 @@ static int
> > > loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
> > >      return 0;
> > >  }
> > >
> > > -static int loadvm_process_enable_colo(MigrationIncomingState *mis)
> > > +static int loadvm_process_enable_colo(MigrationIncomingState *mis,
> > > +                                      Error **errp)
> > >  {
> > > -    int ret = migration_incoming_enable_colo();
> > > +    ERRP_GUARD();
> > > +    int ret;
> > >
> > > -    if (!ret) {
> > > -        ret = colo_init_ram_cache();
> > > -        if (ret) {
> > > -            migration_incoming_disable_colo();
> > > -        }
> > > +    ret = migration_incoming_enable_colo(errp);
> > > +    if (ret < 0) {
> > > +        return ret;
> > > +    }
> > > +
> > > +    ret = colo_init_ram_cache(errp);
> > > +    if (ret) {
> > >
> >
> > Note: here you keep the "ret" error condition !=0, ok.
> >
> > colo_init_ram_cache(), returns -errno on error. Although errno should
> > remain unchanged on success (during qemu_anon_ram_free etc), I think it
> > would be safer to convert the function to follow the recommended
> > bool-valued function for true on success / false on failure instead.
>
> There has been some discussion on this here:
> https://lore.kernel.org/all/aH5AtUcjI3HYXdBe@redhat.com/
> It has been advised not to use bool return.

Ok, but the usage of errno in that function seems fragile as other
functions could clear it before it is returned. Can you make another
patch to change the return value to -1 and document it?

thanks

>
> >
> >
> > > +        error_prepend(errp, "failed to init colo RAM cache: %d: ", ret);
> > > +        migration_incoming_disable_colo();
> > >
> >      }
> > >      return ret;
> > >  }
> > > @@ -2646,11 +2652,7 @@ static int loadvm_process_command(QEMUFile *f,
> > > Error **errp)
> > >          return loadvm_handle_recv_bitmap(mis, len, errp);
> > >
> > >      case MIG_CMD_ENABLE_COLO:
> > > -        ret = loadvm_process_enable_colo(mis);
> > > -        if (ret < 0) {
> > > -            error_setg(errp, "Failed to load device state command: %d",
> > > ret);
> > > -        }
> > > -        return ret;
> > > +        return loadvm_process_enable_colo(mis, errp);
> > >
> > >      case MIG_CMD_SWITCHOVER_START:
> > >          ret = loadvm_postcopy_handle_switchover_start();
> > >
> > > --
> > > 2.50.1
> > >
> > >
> Regards,
> Arun
>
>


-- 
Marc-André Lureau

Re: [PATCH v9 20/27] migration: push Error **errp into loadvm_process_enable_colo()

Posted by Arun Menon 6 months, 1 week ago

Hi Marc-André,

On Wed, Aug 06, 2025 at 02:04:25PM +0400, Marc-André Lureau wrote:
> Hi
> 
> On Wed, Aug 6, 2025 at 1:58 PM Arun Menon <armenon@redhat.com> wrote:
> >
> > Hi Marc-André,
> > Thanks for the review and the Reviewed-by tag.
> >
> > On Wed, Aug 06, 2025 at 12:07:25PM +0400, Marc-André Lureau wrote:
> > > Hi
> > >
> > > On Tue, Aug 5, 2025 at 10:31 PM Arun Menon <armenon@redhat.com> wrote:
> > >
> > > > This is an incremental step in converting vmstate loading
> > > > code to report error via Error objects instead of directly
> > > > printing it to console/monitor.
> > > > It is ensured that loadvm_process_enable_colo() must report an error
> > > > in errp, in case of failure.
> > > >
> > > > Signed-off-by: Arun Menon <armenon@redhat.com>
> > > >
> > >
> > > Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > >
> > >
> > > > ---
> > > >  include/migration/colo.h |  2 +-
> > > >  migration/migration.c    | 12 ++++++------
> > > >  migration/ram.c          |  8 ++++----
> > > >  migration/ram.h          |  2 +-
> > > >  migration/savevm.c       | 26 ++++++++++++++------------
> > > >  5 files changed, 26 insertions(+), 24 deletions(-)
> > > >
> > > > diff --git a/include/migration/colo.h b/include/migration/colo.h
> > > > index
> > > > 43222ef5ae6adc3f7d8aa6a48bef79af33d09208..d4fe422e4d335d3bef4f860f56400fcd73287a0e
> > > > 100644
> > > > --- a/include/migration/colo.h
> > > > +++ b/include/migration/colo.h
> > > > @@ -25,7 +25,7 @@ void migrate_start_colo_process(MigrationState *s);
> > > >  bool migration_in_colo_state(void);
> > > >
> > > >  /* loadvm */
> > > > -int migration_incoming_enable_colo(void);
> > > > +int migration_incoming_enable_colo(Error **errp);
> > > >  void migration_incoming_disable_colo(void);
> > > >  bool migration_incoming_colo_enabled(void);
> > > >  bool migration_incoming_in_colo_state(void);
> > > > diff --git a/migration/migration.c b/migration/migration.c
> > > > index
> > > > 6962dc7d7f3e0121d28994c98f12f9f2258343d7..4a76d7ed730589bae87115368b0bf4819f8b161e
> > > > 100644
> > > > --- a/migration/migration.c
> > > > +++ b/migration/migration.c
> > > > @@ -623,22 +623,22 @@ void migration_incoming_disable_colo(void)
> > > >      migration_colo_enabled = false;
> > > >  }
> > > >
> > > > -int migration_incoming_enable_colo(void)
> > > > +int migration_incoming_enable_colo(Error **errp)
> > > >  {
> > > >  #ifndef CONFIG_REPLICATION
> > > > -    error_report("ENABLE_COLO command come in migration stream, but the "
> > > > -                 "replication module is not built in");
> > > > +    error_setg(errp, "ENABLE_COLO command come in migration stream, but
> > > > the "
> > > > +               "replication module is not built in");
> > > >      return -ENOTSUP;
> > > >  #endif
> > > >
> > > >      if (!migrate_colo()) {
> > > > -        error_report("ENABLE_COLO command come in migration stream, but
> > > > x-colo "
> > > > -                     "capability is not set");
> > > > +        error_setg(errp, "ENABLE_COLO command come in migration stream"
> > > > +                   ", but x-colo capability is not set");
> > > >          return -EINVAL;
> > > >      }
> > > >
> > > >      if (ram_block_discard_disable(true)) {
> > > > -        error_report("COLO: cannot disable RAM discard");
> > > > +        error_setg(errp, "COLO: cannot disable RAM discard");
> > > >          return -EBUSY;
> > > >      }
> > > >      migration_colo_enabled = true;
> > > > diff --git a/migration/ram.c b/migration/ram.c
> > > > index
> > > > 6a0dcc04f436524a37672c41c38f201f06773374..995431c9e320f443c385c29d664d62e18c1afd90
> > > > 100644
> > > > --- a/migration/ram.c
> > > > +++ b/migration/ram.c
> > > > @@ -3576,7 +3576,7 @@ static void colo_init_ram_state(void)
> > > >   * memory of the secondary VM, it is need to hold the global lock
> > > >   * to call this helper.
> > > >   */
> > > > -int colo_init_ram_cache(void)
> > > > +int colo_init_ram_cache(Error **errp)
> > > >  {
> > > >      RAMBlock *block;
> > > >
> > > > @@ -3585,9 +3585,9 @@ int colo_init_ram_cache(void)
> > > >              block->colo_cache = qemu_anon_ram_alloc(block->used_length,
> > > >                                                      NULL, false, false);
> > > >              if (!block->colo_cache) {
> > > > -                error_report("%s: Can't alloc memory for COLO cache of
> > > > block %s,"
> > > > -                             "size 0x" RAM_ADDR_FMT, __func__,
> > > > block->idstr,
> > > > -                             block->used_length);
> > > > +                error_setg(errp, "Can't alloc memory for COLO cache of "
> > > > +                           "block %s, size 0x" RAM_ADDR_FMT,
> > > > +                           block->idstr, block->used_length);
> > > >                  RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> > > >                      if (block->colo_cache) {
> > > >                          qemu_anon_ram_free(block->colo_cache,
> > > > block->used_length);
> > > > diff --git a/migration/ram.h b/migration/ram.h
> > > > index
> > > > 275709a99187f9429ccb4111e05281ec268ba0db..24cd0bf585762cfa1e86834dc03c6baeea2f0627
> > > > 100644
> > > > --- a/migration/ram.h
> > > > +++ b/migration/ram.h
> > > > @@ -109,7 +109,7 @@ void ramblock_set_file_bmap_atomic(RAMBlock *block,
> > > > ram_addr_t offset,
> > > >                                     bool set);
> > > >
> > > >  /* ram cache */
> > > > -int colo_init_ram_cache(void);
> > > > +int colo_init_ram_cache(Error **errp);
> > > >  void colo_flush_ram_cache(void);
> > > >  void colo_release_ram_cache(void);
> > > >  void colo_incoming_start_dirty_log(void);
> > > > diff --git a/migration/savevm.c b/migration/savevm.c
> > > > index
> > > > 938adb20270adbf9546b0884d0877c25c3f0f816..a6b71a958aeda31e89043f8103bfe2fc89542fb5
> > > > 100644
> > > > --- a/migration/savevm.c
> > > > +++ b/migration/savevm.c
> > > > @@ -2519,15 +2519,21 @@ static int
> > > > loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
> > > >      return 0;
> > > >  }
> > > >
> > > > -static int loadvm_process_enable_colo(MigrationIncomingState *mis)
> > > > +static int loadvm_process_enable_colo(MigrationIncomingState *mis,
> > > > +                                      Error **errp)
> > > >  {
> > > > -    int ret = migration_incoming_enable_colo();
> > > > +    ERRP_GUARD();
> > > > +    int ret;
> > > >
> > > > -    if (!ret) {
> > > > -        ret = colo_init_ram_cache();
> > > > -        if (ret) {
> > > > -            migration_incoming_disable_colo();
> > > > -        }
> > > > +    ret = migration_incoming_enable_colo(errp);
> > > > +    if (ret < 0) {
> > > > +        return ret;
> > > > +    }
> > > > +
> > > > +    ret = colo_init_ram_cache(errp);
> > > > +    if (ret) {
> > > >
> > >
> > > Note: here you keep the "ret" error condition !=0, ok.
> > >
> > > colo_init_ram_cache(), returns -errno on error. Although errno should
> > > remain unchanged on success (during qemu_anon_ram_free etc), I think it
> > > would be safer to convert the function to follow the recommended
> > > bool-valued function for true on success / false on failure instead.
> >
> > There has been some discussion on this here:
> > https://lore.kernel.org/all/aH5AtUcjI3HYXdBe@redhat.com/
> > It has been advised not to use bool return.
> 
> Ok, but the usage of errno in that function seems fragile as other
> functions could clear it before it is returned. Can you make another
> patch to change the return value to -1 and document it?
> 
Yes, shall do it. Thanks.

> thanks
> 
> >
> > >
> > >
> > > > +        error_prepend(errp, "failed to init colo RAM cache: %d: ", ret);
> > > > +        migration_incoming_disable_colo();
> > > >
> > >      }
> > > >      return ret;
> > > >  }
> > > > @@ -2646,11 +2652,7 @@ static int loadvm_process_command(QEMUFile *f,
> > > > Error **errp)
> > > >          return loadvm_handle_recv_bitmap(mis, len, errp);
> > > >
> > > >      case MIG_CMD_ENABLE_COLO:
> > > > -        ret = loadvm_process_enable_colo(mis);
> > > > -        if (ret < 0) {
> > > > -            error_setg(errp, "Failed to load device state command: %d",
> > > > ret);
> > > > -        }
> > > > -        return ret;
> > > > +        return loadvm_process_enable_colo(mis, errp);
> > > >
> > > >      case MIG_CMD_SWITCHOVER_START:
> > > >          ret = loadvm_postcopy_handle_switchover_start();
> > > >
> > > > --
> > > > 2.50.1
> > > >
> > > >
> > Regards,
> > Arun
> >
> >
> 
> 
> -- 
> Marc-André Lureau
> 

Regards,
Arun