[PATCH v4 05/12] migration: qemu_file_set_blocking(): add errp parameter

Vladimir Sementsov-Ogievskiy posted 12 patches 1 week, 5 days ago
Maintainers: Eric Blake <eblake@redhat.com>, Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>, Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, "Michael S. Tsirkin" <mst@redhat.com>, Gerd Hoffmann <kraxel@redhat.com>, Gustavo Romero <gustavo.romero@linaro.org>, Elena Ufimtseva <elena.ufimtseva@oracle.com>, Jagannathan Raman <jag.raman@oracle.com>, John Levon <john.levon@nutanix.com>, Thanos Makatos <thanos.makatos@nutanix.com>, "Cédric Le Goater" <clg@redhat.com>, Stefano Garzarella <sgarzare@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>, Hailiang Zhang <zhanghailiang@xfusion.com>, Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>, Jason Wang <jasowang@redhat.com>, Michael Roth <michael.roth@amd.com>, Kostiantyn Kostiuk <kkostiuk@redhat.com>, Fam Zheng <fam@euphon.net>, Alexander Bulekov <alxndr@bu.edu>, Bandan Das <bsd@redhat.com>, Stefan Hajnoczi <stefanha@redhat.com>, Darren Kenny <darren.kenny@oracle.com>, Qiuhao Li <Qiuhao.Li@outlook.com>, Laurent Vivier <lvivier@redhat.com>, Zhao Liu <zhao1.liu@intel.com>, Stefan Weil <sw@weilnetz.de>, Coiby Xu <Coiby.Xu@gmail.com>
There is a newer version of this series
[PATCH v4 05/12] migration: qemu_file_set_blocking(): add errp parameter
Posted by Vladimir Sementsov-Ogievskiy 1 week, 5 days ago
qemu_file_set_blocking() is a wrapper on qio_channel_set_blocking(),
so let's passthrough the errp.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
 migration/colo.c         | 5 ++++-
 migration/migration.c    | 8 +++++---
 migration/postcopy-ram.c | 2 +-
 migration/qemu-file.c    | 4 ++--
 migration/qemu-file.h    | 2 +-
 migration/savevm.c       | 4 ++--
 6 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/migration/colo.c b/migration/colo.c
index e0f713c837..cf4d71d9ed 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -859,7 +859,10 @@ static void *colo_process_incoming_thread(void *opaque)
      * coroutine, and here we are in the COLO incoming thread, so it is ok to
      * set the fd back to blocked.
      */
-    qemu_file_set_blocking(mis->from_src_file, true);
+    if (!qemu_file_set_blocking(mis->from_src_file, true, &local_err)) {
+        error_report_err(local_err);
+        goto out;
+    }
 
     colo_incoming_start_dirty_log();
 
diff --git a/migration/migration.c b/migration/migration.c
index 10c216d25d..e1ac4d73c2 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -951,7 +951,7 @@ static void migration_incoming_setup(QEMUFile *f)
 
     assert(!mis->from_src_file);
     mis->from_src_file = f;
-    qemu_file_set_blocking(f, false);
+    qemu_file_set_blocking(f, false, &error_abort);
 }
 
 void migration_incoming_process(void)
@@ -971,7 +971,7 @@ static bool postcopy_try_recover(void)
         /* This should be set already in migration_incoming_setup() */
         assert(mis->from_src_file);
         /* Postcopy has standalone thread to do vm load */
-        qemu_file_set_blocking(mis->from_src_file, true);
+        qemu_file_set_blocking(mis->from_src_file, true, &error_abort);
 
         /* Re-configure the return path */
         mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
@@ -4002,7 +4002,9 @@ void migration_connect(MigrationState *s, Error *error_in)
     }
 
     migration_rate_set(rate_limit);
-    qemu_file_set_blocking(s->to_dst_file, true);
+    if (!qemu_file_set_blocking(s->to_dst_file, true, &local_err)) {
+        goto fail;
+    }
 
     /*
      * Open the return path. For postcopy, it is used exclusively. For
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 45af9a361e..0172172343 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -1909,7 +1909,7 @@ void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
      * The new loading channel has its own threads, so it needs to be
      * blocked too.  It's by default true, just be explicit.
      */
-    qemu_file_set_blocking(file, true);
+    qemu_file_set_blocking(file, true, &error_abort);
     mis->postcopy_qemufile_dst = file;
     qemu_sem_post(&mis->postcopy_qemufile_dst_done);
     trace_postcopy_preempt_new_channel();
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index d5c6e7ec61..0f4280df21 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -888,9 +888,9 @@ void qemu_put_counted_string(QEMUFile *f, const char *str)
  *       both directions, and thus changing the blocking on the main
  *       QEMUFile can also affect the return path.
  */
-void qemu_file_set_blocking(QEMUFile *f, bool block)
+bool qemu_file_set_blocking(QEMUFile *f, bool block, Error **errp)
 {
-    qio_channel_set_blocking(f->ioc, block, NULL);
+    return qio_channel_set_blocking(f->ioc, block, errp);
 }
 
 /*
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index f5b9f430e0..c13c967167 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -71,7 +71,7 @@ void qemu_file_set_error(QEMUFile *f, int ret);
 int qemu_file_shutdown(QEMUFile *f);
 QEMUFile *qemu_file_get_return_path(QEMUFile *f);
 int qemu_fflush(QEMUFile *f);
-void qemu_file_set_blocking(QEMUFile *f, bool block);
+bool qemu_file_set_blocking(QEMUFile *f, bool block, Error **errp);
 int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size);
 void qemu_set_offset(QEMUFile *f, off_t off, int whence);
 off_t qemu_get_offset(QEMUFile *f);
diff --git a/migration/savevm.c b/migration/savevm.c
index fabbeb296a..abe0547f9b 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2095,7 +2095,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
      * Because we're a thread and not a coroutine we can't yield
      * in qemu_file, and thus we must be blocking now.
      */
-    qemu_file_set_blocking(f, true);
+    qemu_file_set_blocking(f, true, &error_fatal);
 
     /* TODO: sanity check that only postcopiable data will be loaded here */
     load_res = qemu_loadvm_state_main(f, mis);
@@ -2108,7 +2108,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
     f = mis->from_src_file;
 
     /* And non-blocking again so we don't block in any cleanup */
-    qemu_file_set_blocking(f, false);
+    qemu_file_set_blocking(f, false, &error_fatal);
 
     trace_postcopy_ram_listen_thread_exit();
     if (load_res < 0) {
-- 
2.48.1
Re: [PATCH v4 05/12] migration: qemu_file_set_blocking(): add errp parameter
Posted by Peter Xu 1 week, 5 days ago
On Mon, Sep 15, 2025 at 10:30:57PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> qemu_file_set_blocking() is a wrapper on qio_channel_set_blocking(),
> so let's passthrough the errp.

This looks all reasonable in general.

Said that, using error_abort in migration code normally are not suggested
because it's too strong.

I did check all of below should be on the incoming side which is not as
severe (because killing dest qemu before switchover is normally
benign). Still, can we switch all below users to error_warn (including the
one below that may want to error_report_err(), IMHO a warn report is fine
even for such error)?

> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> ---
>  migration/colo.c         | 5 ++++-
>  migration/migration.c    | 8 +++++---
>  migration/postcopy-ram.c | 2 +-
>  migration/qemu-file.c    | 4 ++--
>  migration/qemu-file.h    | 2 +-
>  migration/savevm.c       | 4 ++--
>  6 files changed, 15 insertions(+), 10 deletions(-)
> 
> diff --git a/migration/colo.c b/migration/colo.c
> index e0f713c837..cf4d71d9ed 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -859,7 +859,10 @@ static void *colo_process_incoming_thread(void *opaque)
>       * coroutine, and here we are in the COLO incoming thread, so it is ok to
>       * set the fd back to blocked.
>       */
> -    qemu_file_set_blocking(mis->from_src_file, true);
> +    if (!qemu_file_set_blocking(mis->from_src_file, true, &local_err)) {
> +        error_report_err(local_err);
> +        goto out;
> +    }
>  
>      colo_incoming_start_dirty_log();
>  
> diff --git a/migration/migration.c b/migration/migration.c
> index 10c216d25d..e1ac4d73c2 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -951,7 +951,7 @@ static void migration_incoming_setup(QEMUFile *f)
>  
>      assert(!mis->from_src_file);
>      mis->from_src_file = f;
> -    qemu_file_set_blocking(f, false);
> +    qemu_file_set_blocking(f, false, &error_abort);
>  }
>  
>  void migration_incoming_process(void)
> @@ -971,7 +971,7 @@ static bool postcopy_try_recover(void)
>          /* This should be set already in migration_incoming_setup() */
>          assert(mis->from_src_file);
>          /* Postcopy has standalone thread to do vm load */
> -        qemu_file_set_blocking(mis->from_src_file, true);
> +        qemu_file_set_blocking(mis->from_src_file, true, &error_abort);
>  
>          /* Re-configure the return path */
>          mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
> @@ -4002,7 +4002,9 @@ void migration_connect(MigrationState *s, Error *error_in)
>      }
>  
>      migration_rate_set(rate_limit);
> -    qemu_file_set_blocking(s->to_dst_file, true);
> +    if (!qemu_file_set_blocking(s->to_dst_file, true, &local_err)) {
> +        goto fail;
> +    }
>  
>      /*
>       * Open the return path. For postcopy, it is used exclusively. For
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 45af9a361e..0172172343 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -1909,7 +1909,7 @@ void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
>       * The new loading channel has its own threads, so it needs to be
>       * blocked too.  It's by default true, just be explicit.
>       */
> -    qemu_file_set_blocking(file, true);
> +    qemu_file_set_blocking(file, true, &error_abort);
>      mis->postcopy_qemufile_dst = file;
>      qemu_sem_post(&mis->postcopy_qemufile_dst_done);
>      trace_postcopy_preempt_new_channel();
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index d5c6e7ec61..0f4280df21 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -888,9 +888,9 @@ void qemu_put_counted_string(QEMUFile *f, const char *str)
>   *       both directions, and thus changing the blocking on the main
>   *       QEMUFile can also affect the return path.
>   */
> -void qemu_file_set_blocking(QEMUFile *f, bool block)
> +bool qemu_file_set_blocking(QEMUFile *f, bool block, Error **errp)
>  {
> -    qio_channel_set_blocking(f->ioc, block, NULL);
> +    return qio_channel_set_blocking(f->ioc, block, errp);
>  }
>  
>  /*
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index f5b9f430e0..c13c967167 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -71,7 +71,7 @@ void qemu_file_set_error(QEMUFile *f, int ret);
>  int qemu_file_shutdown(QEMUFile *f);
>  QEMUFile *qemu_file_get_return_path(QEMUFile *f);
>  int qemu_fflush(QEMUFile *f);
> -void qemu_file_set_blocking(QEMUFile *f, bool block);
> +bool qemu_file_set_blocking(QEMUFile *f, bool block, Error **errp);
>  int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size);
>  void qemu_set_offset(QEMUFile *f, off_t off, int whence);
>  off_t qemu_get_offset(QEMUFile *f);
> diff --git a/migration/savevm.c b/migration/savevm.c
> index fabbeb296a..abe0547f9b 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -2095,7 +2095,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
>       * Because we're a thread and not a coroutine we can't yield
>       * in qemu_file, and thus we must be blocking now.
>       */
> -    qemu_file_set_blocking(f, true);
> +    qemu_file_set_blocking(f, true, &error_fatal);
>  
>      /* TODO: sanity check that only postcopiable data will be loaded here */
>      load_res = qemu_loadvm_state_main(f, mis);
> @@ -2108,7 +2108,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
>      f = mis->from_src_file;
>  
>      /* And non-blocking again so we don't block in any cleanup */
> -    qemu_file_set_blocking(f, false);
> +    qemu_file_set_blocking(f, false, &error_fatal);
>  
>      trace_postcopy_ram_listen_thread_exit();
>      if (load_res < 0) {
> -- 
> 2.48.1
> 

-- 
Peter Xu
Re: [PATCH v4 05/12] migration: qemu_file_set_blocking(): add errp parameter
Posted by Daniel P. Berrangé 1 week, 5 days ago
On Mon, Sep 15, 2025 at 04:18:58PM -0400, Peter Xu wrote:
> On Mon, Sep 15, 2025 at 10:30:57PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > qemu_file_set_blocking() is a wrapper on qio_channel_set_blocking(),
> > so let's passthrough the errp.
> 
> This looks all reasonable in general.
> 
> Said that, using error_abort in migration code normally are not suggested
> because it's too strong.

Note, that prior to this series, the existing qemu_socket_set_nonblock
method that migration is calling will assert on failure. This series
removes that assert and propagates it back to the callers to let them
decide what to do. Ideally they would gracefully handle it, but if
they assert that is no worse than current behaviour.

> I did check all of below should be on the incoming side which is not as
> severe (because killing dest qemu before switchover is normally
> benign). Still, can we switch all below users to error_warn (including the
> one below that may want to error_report_err(), IMHO a warn report is fine
> even for such error)?

IMHO ignoring a failure to change the blocking flag status is not
a warnnig, it is unrecoverable for the migration operation. It
should be possible to propagate the error in some way, but it will
potentially require changes across multiple migration methods to
handle this.

> 
> > 
> > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> > ---
> >  migration/colo.c         | 5 ++++-
> >  migration/migration.c    | 8 +++++---
> >  migration/postcopy-ram.c | 2 +-
> >  migration/qemu-file.c    | 4 ++--
> >  migration/qemu-file.h    | 2 +-
> >  migration/savevm.c       | 4 ++--
> >  6 files changed, 15 insertions(+), 10 deletions(-)
> > 
> > diff --git a/migration/colo.c b/migration/colo.c
> > index e0f713c837..cf4d71d9ed 100644
> > --- a/migration/colo.c
> > +++ b/migration/colo.c
> > @@ -859,7 +859,10 @@ static void *colo_process_incoming_thread(void *opaque)
> >       * coroutine, and here we are in the COLO incoming thread, so it is ok to
> >       * set the fd back to blocked.
> >       */
> > -    qemu_file_set_blocking(mis->from_src_file, true);
> > +    if (!qemu_file_set_blocking(mis->from_src_file, true, &local_err)) {
> > +        error_report_err(local_err);
> > +        goto out;
> > +    }
> >  
> >      colo_incoming_start_dirty_log();
> >  
> > diff --git a/migration/migration.c b/migration/migration.c
> > index 10c216d25d..e1ac4d73c2 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -951,7 +951,7 @@ static void migration_incoming_setup(QEMUFile *f)
> >  
> >      assert(!mis->from_src_file);
> >      mis->from_src_file = f;
> > -    qemu_file_set_blocking(f, false);
> > +    qemu_file_set_blocking(f, false, &error_abort);
> >  }
> >  
> >  void migration_incoming_process(void)
> > @@ -971,7 +971,7 @@ static bool postcopy_try_recover(void)
> >          /* This should be set already in migration_incoming_setup() */
> >          assert(mis->from_src_file);
> >          /* Postcopy has standalone thread to do vm load */
> > -        qemu_file_set_blocking(mis->from_src_file, true);
> > +        qemu_file_set_blocking(mis->from_src_file, true, &error_abort);
> >  
> >          /* Re-configure the return path */
> >          mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
> > @@ -4002,7 +4002,9 @@ void migration_connect(MigrationState *s, Error *error_in)
> >      }
> >  
> >      migration_rate_set(rate_limit);
> > -    qemu_file_set_blocking(s->to_dst_file, true);
> > +    if (!qemu_file_set_blocking(s->to_dst_file, true, &local_err)) {
> > +        goto fail;
> > +    }
> >  
> >      /*
> >       * Open the return path. For postcopy, it is used exclusively. For
> > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > index 45af9a361e..0172172343 100644
> > --- a/migration/postcopy-ram.c
> > +++ b/migration/postcopy-ram.c
> > @@ -1909,7 +1909,7 @@ void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
> >       * The new loading channel has its own threads, so it needs to be
> >       * blocked too.  It's by default true, just be explicit.
> >       */
> > -    qemu_file_set_blocking(file, true);
> > +    qemu_file_set_blocking(file, true, &error_abort);
> >      mis->postcopy_qemufile_dst = file;
> >      qemu_sem_post(&mis->postcopy_qemufile_dst_done);
> >      trace_postcopy_preempt_new_channel();
> > diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> > index d5c6e7ec61..0f4280df21 100644
> > --- a/migration/qemu-file.c
> > +++ b/migration/qemu-file.c
> > @@ -888,9 +888,9 @@ void qemu_put_counted_string(QEMUFile *f, const char *str)
> >   *       both directions, and thus changing the blocking on the main
> >   *       QEMUFile can also affect the return path.
> >   */
> > -void qemu_file_set_blocking(QEMUFile *f, bool block)
> > +bool qemu_file_set_blocking(QEMUFile *f, bool block, Error **errp)
> >  {
> > -    qio_channel_set_blocking(f->ioc, block, NULL);
> > +    return qio_channel_set_blocking(f->ioc, block, errp);
> >  }
> >  
> >  /*
> > diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> > index f5b9f430e0..c13c967167 100644
> > --- a/migration/qemu-file.h
> > +++ b/migration/qemu-file.h
> > @@ -71,7 +71,7 @@ void qemu_file_set_error(QEMUFile *f, int ret);
> >  int qemu_file_shutdown(QEMUFile *f);
> >  QEMUFile *qemu_file_get_return_path(QEMUFile *f);
> >  int qemu_fflush(QEMUFile *f);
> > -void qemu_file_set_blocking(QEMUFile *f, bool block);
> > +bool qemu_file_set_blocking(QEMUFile *f, bool block, Error **errp);
> >  int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size);
> >  void qemu_set_offset(QEMUFile *f, off_t off, int whence);
> >  off_t qemu_get_offset(QEMUFile *f);
> > diff --git a/migration/savevm.c b/migration/savevm.c
> > index fabbeb296a..abe0547f9b 100644
> > --- a/migration/savevm.c
> > +++ b/migration/savevm.c
> > @@ -2095,7 +2095,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
> >       * Because we're a thread and not a coroutine we can't yield
> >       * in qemu_file, and thus we must be blocking now.
> >       */
> > -    qemu_file_set_blocking(f, true);
> > +    qemu_file_set_blocking(f, true, &error_fatal);
> >  
> >      /* TODO: sanity check that only postcopiable data will be loaded here */
> >      load_res = qemu_loadvm_state_main(f, mis);
> > @@ -2108,7 +2108,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
> >      f = mis->from_src_file;
> >  
> >      /* And non-blocking again so we don't block in any cleanup */
> > -    qemu_file_set_blocking(f, false);
> > +    qemu_file_set_blocking(f, false, &error_fatal);
> >  
> >      trace_postcopy_ram_listen_thread_exit();
> >      if (load_res < 0) {
> > -- 
> > 2.48.1
> > 
> 
> -- 
> Peter Xu
> 

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
Re: [PATCH v4 05/12] migration: qemu_file_set_blocking(): add errp parameter
Posted by Vladimir Sementsov-Ogievskiy 1 week, 5 days ago
On 16.09.25 11:28, Daniel P. Berrangé wrote:
> On Mon, Sep 15, 2025 at 04:18:58PM -0400, Peter Xu wrote:
>> On Mon, Sep 15, 2025 at 10:30:57PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>>> qemu_file_set_blocking() is a wrapper on qio_channel_set_blocking(),
>>> so let's passthrough the errp.
>> This looks all reasonable in general.
>>
>> Said that, using error_abort in migration code normally are not suggested
>> because it's too strong.
> Note, that prior to this series, the existing qemu_socket_set_nonblock
> method that migration is calling will assert on failure. This series
> removes that assert and propagates it back to the callers to let them
> decide what to do. Ideally they would gracefully handle it, but if
> they assert that is no worse than current behaviour.
> 

In details, prior to series:

posix + set_nonblock -> crash on failure

other variants (posix/win32 + set_block, win32 + set_nonblock) -> ignore failure

>> I did check all of below should be on the incoming side which is not as
>> severe (because killing dest qemu before switchover is normally
>> benign). Still, can we switch all below users to error_warn (including the
>> one below that may want to error_report_err(), IMHO a warn report is fine
>> even for such error)?
> IMHO ignoring a failure to change the blocking flag status is not
> a warnnig, it is unrecoverable for the migration operation. It
> should be possible to propagate the error in some way, but it will
> potentially require changes across multiple migration methods to
> handle this.


-- 
Best regards,
Vladimir

Re: [PATCH v4 05/12] migration: qemu_file_set_blocking(): add errp parameter
Posted by Peter Xu 1 week, 5 days ago
On Tue, Sep 16, 2025 at 04:01:57PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 16.09.25 11:28, Daniel P. Berrangé wrote:
> > On Mon, Sep 15, 2025 at 04:18:58PM -0400, Peter Xu wrote:
> > > On Mon, Sep 15, 2025 at 10:30:57PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > > > qemu_file_set_blocking() is a wrapper on qio_channel_set_blocking(),
> > > > so let's passthrough the errp.
> > > This looks all reasonable in general.
> > > 
> > > Said that, using error_abort in migration code normally are not suggested
> > > because it's too strong.
> > Note, that prior to this series, the existing qemu_socket_set_nonblock
> > method that migration is calling will assert on failure. This series
> > removes that assert and propagates it back to the callers to let them
> > decide what to do. Ideally they would gracefully handle it, but if
> > they assert that is no worse than current behaviour.
> > 
> 
> In details, prior to series:
> 
> posix + set_nonblock -> crash on failure
> 
> other variants (posix/win32 + set_block, win32 + set_nonblock) -> ignore failure

Correct, but IIUC that's for sockets only.

Major channel types that migration cares the most should also include file
now.  qio_channel_file_set_blocking() also doesn't assert but return a
failure.

> 
> > > I did check all of below should be on the incoming side which is not as
> > > severe (because killing dest qemu before switchover is normally
> > > benign). Still, can we switch all below users to error_warn (including the
> > > one below that may want to error_report_err(), IMHO a warn report is fine
> > > even for such error)?
> > IMHO ignoring a failure to change the blocking flag status is not
> > a warnnig, it is unrecoverable for the migration operation. It
> > should be possible to propagate the error in some way, but it will
> > potentially require changes across multiple migration methods to
> > handle this.

In most cases I agree.  But still, using error_abort doesn't mean to fail
migration, but to crash the VM.  We still at least doesn't want to do it on
src..

Meanwhile, this could violate things like newly introduced exit-on-error,
but I agree we used to ignore those, so even if it fails before and didn't
crash, we could have ignored those errors.. and not reportable to libvirt.

The ideal way to do is to always fail either src/dst when set blocking
failed for sure, but yes, it's slightly involved on some paths this patch
touched.

So.. I think we can go with this patch, with a sincere wish that it'll
simply almost never fail.  But then, let's mention that in the commit
message, (1) this patch only asserts on the dest qemu and only before
switchover (hence src can still fallback), never src, (2) state the facts
that it so far is a slight violation to exit-on-error, but it's extremely
unlikely to happen anyway (NOTE: this is not a programming error that
normal assertions would do, so it falls into exit-on-error category).

Thanks,

-- 
Peter Xu


Re: [PATCH v4 05/12] migration: qemu_file_set_blocking(): add errp parameter
Posted by Daniel P. Berrangé 1 week, 5 days ago
On Tue, Sep 16, 2025 at 09:51:16AM -0400, Peter Xu wrote:
> On Tue, Sep 16, 2025 at 04:01:57PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > On 16.09.25 11:28, Daniel P. Berrangé wrote:
> > > On Mon, Sep 15, 2025 at 04:18:58PM -0400, Peter Xu wrote:
> > > > On Mon, Sep 15, 2025 at 10:30:57PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > > > > qemu_file_set_blocking() is a wrapper on qio_channel_set_blocking(),
> > > > > so let's passthrough the errp.
> > > > This looks all reasonable in general.
> > > > 
> > > > Said that, using error_abort in migration code normally are not suggested
> > > > because it's too strong.
> > > Note, that prior to this series, the existing qemu_socket_set_nonblock
> > > method that migration is calling will assert on failure. This series
> > > removes that assert and propagates it back to the callers to let them
> > > decide what to do. Ideally they would gracefully handle it, but if
> > > they assert that is no worse than current behaviour.
> > > 
> > 
> > In details, prior to series:
> > 
> > posix + set_nonblock -> crash on failure
> > 
> > other variants (posix/win32 + set_block, win32 + set_nonblock) -> ignore failure
> 
> Correct, but IIUC that's for sockets only.

True, that'd be the QIOChannelSocket class

> Major channel types that migration cares the most should also include file
> now.  qio_channel_file_set_blocking() also doesn't assert but return a
> failure.

Yep, you're correct that QIOChannelFile won't currently abort.

> > > > I did check all of below should be on the incoming side which is not as
> > > > severe (because killing dest qemu before switchover is normally
> > > > benign). Still, can we switch all below users to error_warn (including the
> > > > one below that may want to error_report_err(), IMHO a warn report is fine
> > > > even for such error)?
> > > IMHO ignoring a failure to change the blocking flag status is not
> > > a warnnig, it is unrecoverable for the migration operation. It
> > > should be possible to propagate the error in some way, but it will
> > > potentially require changes across multiple migration methods to
> > > handle this.
> 
> In most cases I agree.  But still, using error_abort doesn't mean to fail
> migration, but to crash the VM.  We still at least doesn't want to do it on
> src..

Yep, I do agree that it is dangerous to have the error_abort lurking
in there, as it is a trap-door for the future.

> Meanwhile, this could violate things like newly introduced exit-on-error,
> but I agree we used to ignore those, so even if it fails before and didn't
> crash, we could have ignored those errors.. and not reportable to libvirt.
> 
> The ideal way to do is to always fail either src/dst when set blocking
> failed for sure, but yes, it's slightly involved on some paths this patch
> touched.
> 
> So.. I think we can go with this patch, with a sincere wish that it'll
> simply almost never fail.  But then, let's mention that in the commit
> message, (1) this patch only asserts on the dest qemu and only before
> switchover (hence src can still fallback), never src, (2) state the facts
> that it so far is a slight violation to exit-on-error, but it's extremely
> unlikely to happen anyway (NOTE: this is not a programming error that
> normal assertions would do, so it falls into exit-on-error category).

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|


Re: [PATCH v4 05/12] migration: qemu_file_set_blocking(): add errp parameter
Posted by Vladimir Sementsov-Ogievskiy 1 week, 5 days ago
On 15.09.25 23:18, Peter Xu wrote:
> On Mon, Sep 15, 2025 at 10:30:57PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> qemu_file_set_blocking() is a wrapper on qio_channel_set_blocking(),
>> so let's passthrough the errp.
> 
> This looks all reasonable in general.
> 
> Said that, using error_abort in migration code normally are not suggested
> because it's too strong.
> 
> I did check all of below should be on the incoming side which is not as
> severe (because killing dest qemu before switchover is normally
> benign). Still, can we switch all below users to error_warn (including the
> one below that may want to error_report_err(), IMHO a warn report is fine
> even for such error)?

If we failed to change blocking status of fd to what we want, I thought,
we can't simply continue execute further logic, it just will not work
as expected anyway?

> 
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>> ---
>>   migration/colo.c         | 5 ++++-
>>   migration/migration.c    | 8 +++++---
>>   migration/postcopy-ram.c | 2 +-
>>   migration/qemu-file.c    | 4 ++--
>>   migration/qemu-file.h    | 2 +-
>>   migration/savevm.c       | 4 ++--
>>   6 files changed, 15 insertions(+), 10 deletions(-)
>>
>> diff --git a/migration/colo.c b/migration/colo.c
>> index e0f713c837..cf4d71d9ed 100644
>> --- a/migration/colo.c
>> +++ b/migration/colo.c
>> @@ -859,7 +859,10 @@ static void *colo_process_incoming_thread(void *opaque)
>>        * coroutine, and here we are in the COLO incoming thread, so it is ok to
>>        * set the fd back to blocked.
>>        */
>> -    qemu_file_set_blocking(mis->from_src_file, true);
>> +    if (!qemu_file_set_blocking(mis->from_src_file, true, &local_err)) {
>> +        error_report_err(local_err);
>> +        goto out;
>> +    }
>>   
>>       colo_incoming_start_dirty_log();
>>   
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 10c216d25d..e1ac4d73c2 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -951,7 +951,7 @@ static void migration_incoming_setup(QEMUFile *f)
>>   
>>       assert(!mis->from_src_file);
>>       mis->from_src_file = f;
>> -    qemu_file_set_blocking(f, false);
>> +    qemu_file_set_blocking(f, false, &error_abort);
>>   }
>>   
>>   void migration_incoming_process(void)
>> @@ -971,7 +971,7 @@ static bool postcopy_try_recover(void)
>>           /* This should be set already in migration_incoming_setup() */
>>           assert(mis->from_src_file);
>>           /* Postcopy has standalone thread to do vm load */
>> -        qemu_file_set_blocking(mis->from_src_file, true);
>> +        qemu_file_set_blocking(mis->from_src_file, true, &error_abort);
>>   
>>           /* Re-configure the return path */
>>           mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
>> @@ -4002,7 +4002,9 @@ void migration_connect(MigrationState *s, Error *error_in)
>>       }
>>   
>>       migration_rate_set(rate_limit);
>> -    qemu_file_set_blocking(s->to_dst_file, true);
>> +    if (!qemu_file_set_blocking(s->to_dst_file, true, &local_err)) {
>> +        goto fail;
>> +    }
>>   
>>       /*
>>        * Open the return path. For postcopy, it is used exclusively. For
>> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
>> index 45af9a361e..0172172343 100644
>> --- a/migration/postcopy-ram.c
>> +++ b/migration/postcopy-ram.c
>> @@ -1909,7 +1909,7 @@ void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
>>        * The new loading channel has its own threads, so it needs to be
>>        * blocked too.  It's by default true, just be explicit.
>>        */
>> -    qemu_file_set_blocking(file, true);
>> +    qemu_file_set_blocking(file, true, &error_abort);
>>       mis->postcopy_qemufile_dst = file;
>>       qemu_sem_post(&mis->postcopy_qemufile_dst_done);
>>       trace_postcopy_preempt_new_channel();
>> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
>> index d5c6e7ec61..0f4280df21 100644
>> --- a/migration/qemu-file.c
>> +++ b/migration/qemu-file.c
>> @@ -888,9 +888,9 @@ void qemu_put_counted_string(QEMUFile *f, const char *str)
>>    *       both directions, and thus changing the blocking on the main
>>    *       QEMUFile can also affect the return path.
>>    */
>> -void qemu_file_set_blocking(QEMUFile *f, bool block)
>> +bool qemu_file_set_blocking(QEMUFile *f, bool block, Error **errp)
>>   {
>> -    qio_channel_set_blocking(f->ioc, block, NULL);
>> +    return qio_channel_set_blocking(f->ioc, block, errp);
>>   }
>>   
>>   /*
>> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
>> index f5b9f430e0..c13c967167 100644
>> --- a/migration/qemu-file.h
>> +++ b/migration/qemu-file.h
>> @@ -71,7 +71,7 @@ void qemu_file_set_error(QEMUFile *f, int ret);
>>   int qemu_file_shutdown(QEMUFile *f);
>>   QEMUFile *qemu_file_get_return_path(QEMUFile *f);
>>   int qemu_fflush(QEMUFile *f);
>> -void qemu_file_set_blocking(QEMUFile *f, bool block);
>> +bool qemu_file_set_blocking(QEMUFile *f, bool block, Error **errp);
>>   int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size);
>>   void qemu_set_offset(QEMUFile *f, off_t off, int whence);
>>   off_t qemu_get_offset(QEMUFile *f);
>> diff --git a/migration/savevm.c b/migration/savevm.c
>> index fabbeb296a..abe0547f9b 100644
>> --- a/migration/savevm.c
>> +++ b/migration/savevm.c
>> @@ -2095,7 +2095,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
>>        * Because we're a thread and not a coroutine we can't yield
>>        * in qemu_file, and thus we must be blocking now.
>>        */
>> -    qemu_file_set_blocking(f, true);
>> +    qemu_file_set_blocking(f, true, &error_fatal);
>>   
>>       /* TODO: sanity check that only postcopiable data will be loaded here */
>>       load_res = qemu_loadvm_state_main(f, mis);
>> @@ -2108,7 +2108,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
>>       f = mis->from_src_file;
>>   
>>       /* And non-blocking again so we don't block in any cleanup */
>> -    qemu_file_set_blocking(f, false);
>> +    qemu_file_set_blocking(f, false, &error_fatal);
>>   
>>       trace_postcopy_ram_listen_thread_exit();
>>       if (load_res < 0) {
>> -- 
>> 2.48.1
>>
> 


-- 
Best regards,
Vladimir