[PATCH 08/14] vfio: Use new Error** argument in vfio_save_setup()

Cédric Le Goater posted 14 patches 9 months, 3 weeks ago
Maintainers: Stefano Stabellini <sstabellini@kernel.org>, Anthony Perard <anthony.perard@citrix.com>, Paul Durrant <paul@xen.org>, "Michael S. Tsirkin" <mst@redhat.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, Nicholas Piggin <npiggin@gmail.com>, Daniel Henrique Barboza <danielhb413@gmail.com>, "Cédric Le Goater" <clg@kaod.org>, David Gibson <david@gibson.dropbear.id.au>, Harsh Prateek Bora <harshpb@linux.ibm.com>, Halil Pasic <pasic@linux.ibm.com>, Christian Borntraeger <borntraeger@linux.ibm.com>, Eric Farman <farman@linux.ibm.com>, David Hildenbrand <david@redhat.com>, Ilya Leoshkevich <iii@linux.ibm.com>, Thomas Huth <thuth@redhat.com>, Alex Williamson <alex.williamson@redhat.com>, Peter Xu <peterx@redhat.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Fabiano Rosas <farosas@suse.de>, Stefan Hajnoczi <stefanha@redhat.com>, Fam Zheng <fam@euphon.net>, Eric Blake <eblake@redhat.com>, Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>, John Snow <jsnow@redhat.com>, Hyman Huang <yong.huang@smartx.com>
There is a newer version of this series
[PATCH 08/14] vfio: Use new Error** argument in vfio_save_setup()
Posted by Cédric Le Goater 9 months, 3 weeks ago
Add an Error** argument to vfio_migration_set_state() and adjust
callers, including vfio_save_setup(). The error will be propagated up
to qemu_savevm_state_setup() where the save_setup() handler is
executed.

Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
 hw/vfio/migration.c | 62 +++++++++++++++++++++++++++++----------------
 1 file changed, 40 insertions(+), 22 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 2dfbe671f6f45aa530c7341177bb532d8292cecd..2e0a79967cc97f44d9be5575c3cfe18c9f349dab 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -84,7 +84,8 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state)
 
 static int vfio_migration_set_state(VFIODevice *vbasedev,
                                     enum vfio_device_mig_state new_state,
-                                    enum vfio_device_mig_state recover_state)
+                                    enum vfio_device_mig_state recover_state,
+                                    Error **errp)
 {
     VFIOMigration *migration = vbasedev->migration;
     uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
@@ -104,15 +105,15 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
         ret = -errno;
 
         if (recover_state == VFIO_DEVICE_STATE_ERROR) {
-            error_report("%s: Failed setting device state to %s, err: %s. "
-                         "Recover state is ERROR. Resetting device",
-                         vbasedev->name, mig_state_to_str(new_state),
-                         strerror(errno));
+            error_setg(errp, "%s: Failed setting device state to %s, err: %s. "
+                       "Recover state is ERROR. Resetting device",
+                       vbasedev->name, mig_state_to_str(new_state),
+                       strerror(errno));
 
             goto reset_device;
         }
 
-        error_report(
+        error_setg(errp,
             "%s: Failed setting device state to %s, err: %s. Setting device in recover state %s",
                      vbasedev->name, mig_state_to_str(new_state),
                      strerror(errno), mig_state_to_str(recover_state));
@@ -120,7 +121,7 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
         mig_state->device_state = recover_state;
         if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
             ret = -errno;
-            error_report(
+            error_setg(errp,
                 "%s: Failed setting device in recover state, err: %s. Resetting device",
                          vbasedev->name, strerror(errno));
 
@@ -139,7 +140,7 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
              * This can happen if the device is asynchronously reset and
              * terminates a data transfer.
              */
-            error_report("%s: data_fd out of sync", vbasedev->name);
+            error_setg(errp, "%s: data_fd out of sync", vbasedev->name);
             close(mig_state->data_fd);
 
             return -EBADF;
@@ -170,10 +171,11 @@ reset_device:
  */
 static int
 vfio_migration_set_state_or_reset(VFIODevice *vbasedev,
-                                  enum vfio_device_mig_state new_state)
+                                  enum vfio_device_mig_state new_state,
+                                  Error **errp)
 {
     return vfio_migration_set_state(vbasedev, new_state,
-                                    VFIO_DEVICE_STATE_ERROR);
+                                    VFIO_DEVICE_STATE_ERROR, errp);
 }
 
 static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
@@ -391,8 +393,8 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
                                       stop_copy_size);
     migration->data_buffer = g_try_malloc0(migration->data_buffer_size);
     if (!migration->data_buffer) {
-        error_report("%s: Failed to allocate migration data buffer",
-                     vbasedev->name);
+        error_setg(errp, "%s: Failed to allocate migration data buffer",
+                   vbasedev->name);
         return -ENOMEM;
     }
 
@@ -402,7 +404,7 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
         switch (migration->device_state) {
         case VFIO_DEVICE_STATE_RUNNING:
             ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_PRE_COPY,
-                                           VFIO_DEVICE_STATE_RUNNING);
+                                           VFIO_DEVICE_STATE_RUNNING, errp);
             if (ret) {
                 return ret;
             }
@@ -429,13 +431,18 @@ static void vfio_save_cleanup(void *opaque)
 {
     VFIODevice *vbasedev = opaque;
     VFIOMigration *migration = vbasedev->migration;
+    Error *local_err = NULL;
 
     /*
      * Changing device state from STOP_COPY to STOP can take time. Do it here,
      * after migration has completed, so it won't increase downtime.
      */
     if (migration->device_state == VFIO_DEVICE_STATE_STOP_COPY) {
-        vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_STOP);
+        vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_STOP,
+                                          &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+        }
     }
 
     g_free(migration->data_buffer);
@@ -541,11 +548,13 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
     VFIODevice *vbasedev = opaque;
     ssize_t data_size;
     int ret;
+    Error *local_err = NULL;
 
     /* We reach here with device state STOP or STOP_COPY only */
     ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
-                                   VFIO_DEVICE_STATE_STOP);
-    if (ret) {
+                                   VFIO_DEVICE_STATE_STOP, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
         return ret;
     }
 
@@ -585,7 +594,7 @@ static int vfio_load_setup(QEMUFile *f, void *opaque, Error **errp)
     VFIODevice *vbasedev = opaque;
 
     return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
-                                   vbasedev->migration->device_state);
+                                    vbasedev->migration->device_state, errp);
 }
 
 static int vfio_load_cleanup(void *opaque)
@@ -701,20 +710,22 @@ static void vfio_vmstate_change_prepare(void *opaque, bool running,
     VFIODevice *vbasedev = opaque;
     VFIOMigration *migration = vbasedev->migration;
     enum vfio_device_mig_state new_state;
+    Error *local_err = NULL;
     int ret;
 
     new_state = migration->device_state == VFIO_DEVICE_STATE_PRE_COPY ?
                     VFIO_DEVICE_STATE_PRE_COPY_P2P :
                     VFIO_DEVICE_STATE_RUNNING_P2P;
 
-    ret = vfio_migration_set_state_or_reset(vbasedev, new_state);
+    ret = vfio_migration_set_state_or_reset(vbasedev, new_state, &local_err);
     if (ret) {
         /*
          * Migration should be aborted in this case, but vm_state_notify()
          * currently does not support reporting failures.
          */
         if (migrate_get_current()->to_dst_file) {
-            qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
+            qemu_file_set_error_obj(migrate_get_current()->to_dst_file, ret,
+                                    local_err);
         }
     }
 
@@ -727,6 +738,7 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state)
 {
     VFIODevice *vbasedev = opaque;
     enum vfio_device_mig_state new_state;
+    Error *local_err = NULL;
     int ret;
 
     if (running) {
@@ -739,14 +751,15 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state)
                 VFIO_DEVICE_STATE_STOP;
     }
 
-    ret = vfio_migration_set_state_or_reset(vbasedev, new_state);
+    ret = vfio_migration_set_state_or_reset(vbasedev, new_state, &local_err);
     if (ret) {
         /*
          * Migration should be aborted in this case, but vm_state_notify()
          * currently does not support reporting failures.
          */
         if (migrate_get_current()->to_dst_file) {
-            qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
+            qemu_file_set_error_obj(migrate_get_current()->to_dst_file, ret,
+                                    local_err);
         }
     }
 
@@ -760,6 +773,7 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data)
     VFIOMigration *migration = container_of(notifier, VFIOMigration,
                                             migration_state);
     VFIODevice *vbasedev = migration->vbasedev;
+    Error *local_err = NULL;
 
     trace_vfio_migration_state_notifier(vbasedev->name,
                                         MigrationStatus_str(s->state));
@@ -768,7 +782,11 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data)
     case MIGRATION_STATUS_CANCELLING:
     case MIGRATION_STATUS_CANCELLED:
     case MIGRATION_STATUS_FAILED:
-        vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_RUNNING);
+        vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_RUNNING,
+                                          &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+        }
     }
 }
 
-- 
2.43.0


Re: [PATCH 08/14] vfio: Use new Error** argument in vfio_save_setup()
Posted by Avihai Horon 9 months, 2 weeks ago
Hi Cedric,

On 07/02/2024 15:33, Cédric Le Goater wrote:
> External email: Use caution opening links or attachments
>
>
> Add an Error** argument to vfio_migration_set_state() and adjust
> callers, including vfio_save_setup(). The error will be propagated up
> to qemu_savevm_state_setup() where the save_setup() handler is
> executed.
>
> Signed-off-by: Cédric Le Goater <clg@redhat.com>
> ---
>   hw/vfio/migration.c | 62 +++++++++++++++++++++++++++++----------------
>   1 file changed, 40 insertions(+), 22 deletions(-)
>
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 2dfbe671f6f45aa530c7341177bb532d8292cecd..2e0a79967cc97f44d9be5575c3cfe18c9f349dab 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -84,7 +84,8 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state)
>
>   static int vfio_migration_set_state(VFIODevice *vbasedev,
>                                       enum vfio_device_mig_state new_state,
> -                                    enum vfio_device_mig_state recover_state)
> +                                    enum vfio_device_mig_state recover_state,
> +                                    Error **errp)
>   {
>       VFIOMigration *migration = vbasedev->migration;
>       uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
> @@ -104,15 +105,15 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
>           ret = -errno;
>
>           if (recover_state == VFIO_DEVICE_STATE_ERROR) {
> -            error_report("%s: Failed setting device state to %s, err: %s. "
> -                         "Recover state is ERROR. Resetting device",
> -                         vbasedev->name, mig_state_to_str(new_state),
> -                         strerror(errno));
> +            error_setg(errp, "%s: Failed setting device state to %s, err: %s. "
> +                       "Recover state is ERROR. Resetting device",
> +                       vbasedev->name, mig_state_to_str(new_state),
> +                       strerror(errno));
>
>               goto reset_device;
>           }
>
> -        error_report(
> +        error_setg(errp,
>               "%s: Failed setting device state to %s, err: %s. Setting device in recover state %s",
>                        vbasedev->name, mig_state_to_str(new_state),
>                        strerror(errno), mig_state_to_str(recover_state));
> @@ -120,7 +121,7 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
>           mig_state->device_state = recover_state;
>           if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
>               ret = -errno;
> -            error_report(
> +            error_setg(errp,
>                   "%s: Failed setting device in recover state, err: %s. Resetting device",
>                            vbasedev->name, strerror(errno));

I think here we will assert because errp is already set.

Adding an error_append() API would be useful here I guess.
Otherwise, we need to move the first error_setg() below, to before we 
return from a successful recover state change, and construct the error 
message differently (e.g., provide a full error message for the recover 
state fail case containing also the first error).

Do you have other ideas?

Thanks.

>
> @@ -139,7 +140,7 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
>                * This can happen if the device is asynchronously reset and
>                * terminates a data transfer.
>                */
> -            error_report("%s: data_fd out of sync", vbasedev->name);
> +            error_setg(errp, "%s: data_fd out of sync", vbasedev->name);
>               close(mig_state->data_fd);
>
>               return -EBADF;
> @@ -170,10 +171,11 @@ reset_device:
>    */
>   static int
>   vfio_migration_set_state_or_reset(VFIODevice *vbasedev,
> -                                  enum vfio_device_mig_state new_state)
> +                                  enum vfio_device_mig_state new_state,
> +                                  Error **errp)
>   {
>       return vfio_migration_set_state(vbasedev, new_state,
> -                                    VFIO_DEVICE_STATE_ERROR);
> +                                    VFIO_DEVICE_STATE_ERROR, errp);
>   }
>
>   static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
> @@ -391,8 +393,8 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
>                                         stop_copy_size);
>       migration->data_buffer = g_try_malloc0(migration->data_buffer_size);
>       if (!migration->data_buffer) {
> -        error_report("%s: Failed to allocate migration data buffer",
> -                     vbasedev->name);
> +        error_setg(errp, "%s: Failed to allocate migration data buffer",
> +                   vbasedev->name);
>           return -ENOMEM;
>       }
>
> @@ -402,7 +404,7 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
>           switch (migration->device_state) {
>           case VFIO_DEVICE_STATE_RUNNING:
>               ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_PRE_COPY,
> -                                           VFIO_DEVICE_STATE_RUNNING);
> +                                           VFIO_DEVICE_STATE_RUNNING, errp);
>               if (ret) {
>                   return ret;
>               }
> @@ -429,13 +431,18 @@ static void vfio_save_cleanup(void *opaque)
>   {
>       VFIODevice *vbasedev = opaque;
>       VFIOMigration *migration = vbasedev->migration;
> +    Error *local_err = NULL;
>
>       /*
>        * Changing device state from STOP_COPY to STOP can take time. Do it here,
>        * after migration has completed, so it won't increase downtime.
>        */
>       if (migration->device_state == VFIO_DEVICE_STATE_STOP_COPY) {
> -        vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_STOP);
> +        vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_STOP,
> +                                          &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +        }
>       }
>
>       g_free(migration->data_buffer);
> @@ -541,11 +548,13 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
>       VFIODevice *vbasedev = opaque;
>       ssize_t data_size;
>       int ret;
> +    Error *local_err = NULL;
>
>       /* We reach here with device state STOP or STOP_COPY only */
>       ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
> -                                   VFIO_DEVICE_STATE_STOP);
> -    if (ret) {
> +                                   VFIO_DEVICE_STATE_STOP, &local_err);
> +    if (local_err) {
> +        error_report_err(local_err);
>           return ret;
>       }
>
> @@ -585,7 +594,7 @@ static int vfio_load_setup(QEMUFile *f, void *opaque, Error **errp)
>       VFIODevice *vbasedev = opaque;
>
>       return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
> -                                   vbasedev->migration->device_state);
> +                                    vbasedev->migration->device_state, errp);
>   }
>
>   static int vfio_load_cleanup(void *opaque)
> @@ -701,20 +710,22 @@ static void vfio_vmstate_change_prepare(void *opaque, bool running,
>       VFIODevice *vbasedev = opaque;
>       VFIOMigration *migration = vbasedev->migration;
>       enum vfio_device_mig_state new_state;
> +    Error *local_err = NULL;
>       int ret;
>
>       new_state = migration->device_state == VFIO_DEVICE_STATE_PRE_COPY ?
>                       VFIO_DEVICE_STATE_PRE_COPY_P2P :
>                       VFIO_DEVICE_STATE_RUNNING_P2P;
>
> -    ret = vfio_migration_set_state_or_reset(vbasedev, new_state);
> +    ret = vfio_migration_set_state_or_reset(vbasedev, new_state, &local_err);
>       if (ret) {
>           /*
>            * Migration should be aborted in this case, but vm_state_notify()
>            * currently does not support reporting failures.
>            */
>           if (migrate_get_current()->to_dst_file) {
> -            qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
> +            qemu_file_set_error_obj(migrate_get_current()->to_dst_file, ret,
> +                                    local_err);
>           }
>       }
>
> @@ -727,6 +738,7 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state)
>   {
>       VFIODevice *vbasedev = opaque;
>       enum vfio_device_mig_state new_state;
> +    Error *local_err = NULL;
>       int ret;
>
>       if (running) {
> @@ -739,14 +751,15 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state)
>                   VFIO_DEVICE_STATE_STOP;
>       }
>
> -    ret = vfio_migration_set_state_or_reset(vbasedev, new_state);
> +    ret = vfio_migration_set_state_or_reset(vbasedev, new_state, &local_err);
>       if (ret) {
>           /*
>            * Migration should be aborted in this case, but vm_state_notify()
>            * currently does not support reporting failures.
>            */
>           if (migrate_get_current()->to_dst_file) {
> -            qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
> +            qemu_file_set_error_obj(migrate_get_current()->to_dst_file, ret,
> +                                    local_err);
>           }
>       }
>
> @@ -760,6 +773,7 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data)
>       VFIOMigration *migration = container_of(notifier, VFIOMigration,
>                                               migration_state);
>       VFIODevice *vbasedev = migration->vbasedev;
> +    Error *local_err = NULL;
>
>       trace_vfio_migration_state_notifier(vbasedev->name,
>                                           MigrationStatus_str(s->state));
> @@ -768,7 +782,11 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data)
>       case MIGRATION_STATUS_CANCELLING:
>       case MIGRATION_STATUS_CANCELLED:
>       case MIGRATION_STATUS_FAILED:
> -        vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_RUNNING);
> +        vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_RUNNING,
> +                                          &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +        }
>       }
>   }
>
> --
> 2.43.0
>
>

Re: [PATCH 08/14] vfio: Use new Error** argument in vfio_save_setup()
Posted by Cédric Le Goater 9 months, 2 weeks ago
On 2/12/24 10:17, Avihai Horon wrote:
> Hi Cedric,
> 
> On 07/02/2024 15:33, Cédric Le Goater wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> Add an Error** argument to vfio_migration_set_state() and adjust
>> callers, including vfio_save_setup(). The error will be propagated up
>> to qemu_savevm_state_setup() where the save_setup() handler is
>> executed.
>>
>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>> ---
>>   hw/vfio/migration.c | 62 +++++++++++++++++++++++++++++----------------
>>   1 file changed, 40 insertions(+), 22 deletions(-)
>>
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index 2dfbe671f6f45aa530c7341177bb532d8292cecd..2e0a79967cc97f44d9be5575c3cfe18c9f349dab 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -84,7 +84,8 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state)
>>
>>   static int vfio_migration_set_state(VFIODevice *vbasedev,
>>                                       enum vfio_device_mig_state new_state,
>> -                                    enum vfio_device_mig_state recover_state)
>> +                                    enum vfio_device_mig_state recover_state,
>> +                                    Error **errp)
>>   {
>>       VFIOMigration *migration = vbasedev->migration;
>>       uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
>> @@ -104,15 +105,15 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
>>           ret = -errno;
>>
>>           if (recover_state == VFIO_DEVICE_STATE_ERROR) {
>> -            error_report("%s: Failed setting device state to %s, err: %s. "
>> -                         "Recover state is ERROR. Resetting device",
>> -                         vbasedev->name, mig_state_to_str(new_state),
>> -                         strerror(errno));
>> +            error_setg(errp, "%s: Failed setting device state to %s, err: %s. "
>> +                       "Recover state is ERROR. Resetting device",
>> +                       vbasedev->name, mig_state_to_str(new_state),
>> +                       strerror(errno));
>>
>>               goto reset_device;
>>           }
>>
>> -        error_report(
>> +        error_setg(errp,
>>               "%s: Failed setting device state to %s, err: %s. Setting device in recover state %s",
>>                        vbasedev->name, mig_state_to_str(new_state),
>>                        strerror(errno), mig_state_to_str(recover_state));
>> @@ -120,7 +121,7 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
>>           mig_state->device_state = recover_state;
>>           if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
>>               ret = -errno;
>> -            error_report(
>> +            error_setg(errp,
>>                   "%s: Failed setting device in recover state, err: %s. Resetting device",
>>                            vbasedev->name, strerror(errno));
> 
> I think here we will assert because errp is already set.
> 
> Adding an error_append() API would be useful here I guess.

yes.

> Otherwise, we need to move the first error_setg() below, to before we return from a successful recover state change, and construct the error message differently (e.g., provide a full error message for the recover state fail case containing also the first error).
> 
> Do you have other ideas?

Errors for :

     if (ioctl(vbasedev->fd, VFIO_DEVICE_RESET)) {

should be treated as the others with and error_append() and not
hw_error(). This needs a rework before any new changes.

I also wonder why we have twice :

     migration->device_state = recover_state;

It looks redundant. The ioctl VFIO_DEVICE_FEATURE should leave the
state unmodified.

Thanks,

C.



Re: [PATCH 08/14] vfio: Use new Error** argument in vfio_save_setup()
Posted by Avihai Horon 9 months, 2 weeks ago
On 12/02/2024 19:54, Cédric Le Goater wrote:
> External email: Use caution opening links or attachments
>
>
> On 2/12/24 10:17, Avihai Horon wrote:
>> Hi Cedric,
>>
>> On 07/02/2024 15:33, Cédric Le Goater wrote:
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> Add an Error** argument to vfio_migration_set_state() and adjust
>>> callers, including vfio_save_setup(). The error will be propagated up
>>> to qemu_savevm_state_setup() where the save_setup() handler is
>>> executed.
>>>
>>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>>> ---
>>>   hw/vfio/migration.c | 62 
>>> +++++++++++++++++++++++++++++----------------
>>>   1 file changed, 40 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>> index 
>>> 2dfbe671f6f45aa530c7341177bb532d8292cecd..2e0a79967cc97f44d9be5575c3cfe18c9f349dab 
>>> 100644
>>> --- a/hw/vfio/migration.c
>>> +++ b/hw/vfio/migration.c
>>> @@ -84,7 +84,8 @@ static const char *mig_state_to_str(enum 
>>> vfio_device_mig_state state)
>>>
>>>   static int vfio_migration_set_state(VFIODevice *vbasedev,
>>>                                       enum vfio_device_mig_state 
>>> new_state,
>>> -                                    enum vfio_device_mig_state 
>>> recover_state)
>>> +                                    enum vfio_device_mig_state 
>>> recover_state,
>>> +                                    Error **errp)
>>>   {
>>>       VFIOMigration *migration = vbasedev->migration;
>>>       uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
>>> @@ -104,15 +105,15 @@ static int vfio_migration_set_state(VFIODevice 
>>> *vbasedev,
>>>           ret = -errno;
>>>
>>>           if (recover_state == VFIO_DEVICE_STATE_ERROR) {
>>> -            error_report("%s: Failed setting device state to %s, 
>>> err: %s. "
>>> -                         "Recover state is ERROR. Resetting device",
>>> -                         vbasedev->name, mig_state_to_str(new_state),
>>> -                         strerror(errno));
>>> +            error_setg(errp, "%s: Failed setting device state to 
>>> %s, err: %s. "
>>> +                       "Recover state is ERROR. Resetting device",
>>> +                       vbasedev->name, mig_state_to_str(new_state),
>>> +                       strerror(errno));
>>>
>>>               goto reset_device;
>>>           }
>>>
>>> -        error_report(
>>> +        error_setg(errp,
>>>               "%s: Failed setting device state to %s, err: %s. 
>>> Setting device in recover state %s",
>>>                        vbasedev->name, mig_state_to_str(new_state),
>>>                        strerror(errno), 
>>> mig_state_to_str(recover_state));
>>> @@ -120,7 +121,7 @@ static int vfio_migration_set_state(VFIODevice 
>>> *vbasedev,
>>>           mig_state->device_state = recover_state;
>>>           if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
>>>               ret = -errno;
>>> -            error_report(
>>> +            error_setg(errp,
>>>                   "%s: Failed setting device in recover state, err: 
>>> %s. Resetting device",
>>>                            vbasedev->name, strerror(errno));
>>
>> I think here we will assert because errp is already set.
>>
>> Adding an error_append() API would be useful here I guess.
>
> yes.
>
>> Otherwise, we need to move the first error_setg() below, to before we 
>> return from a successful recover state change, and construct the 
>> error message differently (e.g., provide a full error message for the 
>> recover state fail case containing also the first error).
>>
>> Do you have other ideas?
>
> Errors for :
>
>     if (ioctl(vbasedev->fd, VFIO_DEVICE_RESET)) {
>
> should be treated as the others with and error_append() and not
> hw_error(). This needs a rework before any new changes.
>
> I also wonder why we have twice :
>
>     migration->device_state = recover_state;

Not sure where you see we have it twice.

One time we set "mig_state->device_state = recover_state;" for the ioctl.
And another time we set "migration->device_state = recover_state;" in 
case of successful ioctl, to update the device state in migration struct.

>
> It looks redundant. The ioctl VFIO_DEVICE_FEATURE should leave the
> state unmodified.
>
> Thanks,
>
> C.
>
>

Re: [PATCH 08/14] vfio: Use new Error** argument in vfio_save_setup()
Posted by Philippe Mathieu-Daudé 9 months, 3 weeks ago
On 7/2/24 14:33, Cédric Le Goater wrote:
> Add an Error** argument to vfio_migration_set_state() and adjust
> callers, including vfio_save_setup(). The error will be propagated up
> to qemu_savevm_state_setup() where the save_setup() handler is
> executed.
> 
> Signed-off-by: Cédric Le Goater <clg@redhat.com>
> ---
>   hw/vfio/migration.c | 62 +++++++++++++++++++++++++++++----------------
>   1 file changed, 40 insertions(+), 22 deletions(-)


> @@ -429,13 +431,18 @@ static void vfio_save_cleanup(void *opaque)
>   {
>       VFIODevice *vbasedev = opaque;
>       VFIOMigration *migration = vbasedev->migration;
> +    Error *local_err = NULL;
>   
>       /*
>        * Changing device state from STOP_COPY to STOP can take time. Do it here,
>        * after migration has completed, so it won't increase downtime.
>        */
>       if (migration->device_state == VFIO_DEVICE_STATE_STOP_COPY) {
> -        vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_STOP);
> +        vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_STOP,
> +                                          &local_err);
> +        if (local_err) {

Please check callee return value instead.

> +            error_report_err(local_err);
> +        }
>       }
>   
>       g_free(migration->data_buffer);
> @@ -541,11 +548,13 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
>       VFIODevice *vbasedev = opaque;
>       ssize_t data_size;
>       int ret;
> +    Error *local_err = NULL;
>   
>       /* We reach here with device state STOP or STOP_COPY only */
>       ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
> -                                   VFIO_DEVICE_STATE_STOP);
> -    if (ret) {
> +                                   VFIO_DEVICE_STATE_STOP, &local_err);
> +    if (local_err) {

Ditto.

> +        error_report_err(local_err);
>           return ret;
>       }


> @@ -760,6 +773,7 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data)
>       VFIOMigration *migration = container_of(notifier, VFIOMigration,
>                                               migration_state);
>       VFIODevice *vbasedev = migration->vbasedev;
> +    Error *local_err = NULL;
>   
>       trace_vfio_migration_state_notifier(vbasedev->name,
>                                           MigrationStatus_str(s->state));
> @@ -768,7 +782,11 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data)
>       case MIGRATION_STATUS_CANCELLING:
>       case MIGRATION_STATUS_CANCELLED:
>       case MIGRATION_STATUS_FAILED:
> -        vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_RUNNING);
> +        vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_RUNNING,
> +                                          &local_err);
> +        if (local_err) {

Ditto.

> +            error_report_err(local_err);
> +        }
>       }
>   }
>