[PATCH v2 3/3] vhost-user: return failure if backend crash when live migration

Haoqian He posted 3 patches 2 weeks, 6 days ago
There is a newer version of this series
[PATCH v2 3/3] vhost-user: return failure if backend crash when live migration
Posted by Haoqian He 2 weeks, 6 days ago
Live migration should be terminated if the backend crashes before
the migration completes.

Since the vhost device will be stopped when VM is stopped before
the end of the live migration, current implementation if vhost
backend died, vhost device's set_status() will not return failure,
live migration won't perceive the disconnection between qemu and
vhost backend, inflight io would be submitted in migration target
host, leading to IO error.

To fix this issue:
1. Add set_status_ext() which has return value for
VirtioDeviceClass and vhost-user-blk/scsi use the _ext version.

2. In set_status_ext(), return failure if the flag `connected`
is false or vhost_dev_stop return failure, which means qemu lost
connection with backend.

Hence migration_completion() will process failure, terminate
migration and restore VM.

Signed-off-by: Haoqian He <haoqian.he@smartx.com>
---
 hw/block/vhost-user-blk.c             | 29 +++++++++++++++------------
 hw/scsi/vhost-scsi-common.c           | 13 ++++++------
 hw/scsi/vhost-user-scsi.c             | 20 ++++++++++--------
 hw/virtio/virtio.c                    | 20 +++++++++++++-----
 include/hw/virtio/vhost-scsi-common.h |  2 +-
 include/hw/virtio/virtio.h            |  1 +
 6 files changed, 52 insertions(+), 33 deletions(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index ae42327cf8..4865786c54 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -204,7 +204,7 @@ err_host_notifiers:
     return ret;
 }
 
-static void vhost_user_blk_stop(VirtIODevice *vdev)
+static int vhost_user_blk_stop(VirtIODevice *vdev)
 {
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
@@ -212,26 +212,26 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
     int ret;
 
     if (!s->started_vu) {
-        return;
+        return 0;
     }
     s->started_vu = false;
 
     if (!k->set_guest_notifiers) {
-        return;
+        return 0;
     }
 
-    vhost_dev_stop(&s->dev, vdev, true);
+    ret = vhost_dev_stop(&s->dev, vdev, true);
 
-    ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
-    if (ret < 0) {
+    if (k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false) < 0) {
         error_report("vhost guest notifier cleanup failed: %d", ret);
-        return;
+        return -1;
     }
 
     vhost_dev_disable_notifiers(&s->dev, vdev);
+    return ret;
 }
 
-static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
+static int vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
 {
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
     bool should_start = virtio_device_should_start(vdev, status);
@@ -239,11 +239,11 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
     int ret;
 
     if (!s->connected) {
-        return;
+        return -1;
     }
 
     if (vhost_dev_is_started(&s->dev) == should_start) {
-        return;
+        return 0;
     }
 
     if (should_start) {
@@ -253,9 +253,12 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
             qemu_chr_fe_disconnect(&s->chardev);
         }
     } else {
-        vhost_user_blk_stop(vdev);
+        ret = vhost_user_blk_stop(vdev);
+        if (ret < 0) {
+            return ret;
+        }
     }
-
+    return 0;
 }
 
 static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
@@ -597,7 +600,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
     vdc->get_config = vhost_user_blk_update_config;
     vdc->set_config = vhost_user_blk_set_config;
     vdc->get_features = vhost_user_blk_get_features;
-    vdc->set_status = vhost_user_blk_set_status;
+    vdc->set_status_ext = vhost_user_blk_set_status;
     vdc->reset = vhost_user_blk_reset;
     vdc->get_vhost = vhost_user_blk_get_vhost;
 }
diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c
index 4c8637045d..43525ba46d 100644
--- a/hw/scsi/vhost-scsi-common.c
+++ b/hw/scsi/vhost-scsi-common.c
@@ -101,24 +101,25 @@ err_host_notifiers:
     return ret;
 }
 
-void vhost_scsi_common_stop(VHostSCSICommon *vsc)
+int vhost_scsi_common_stop(VHostSCSICommon *vsc)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(vsc);
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
     int ret = 0;
 
-    vhost_dev_stop(&vsc->dev, vdev, true);
+    ret = vhost_dev_stop(&vsc->dev, vdev, true);
 
     if (k->set_guest_notifiers) {
-        ret = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
-        if (ret < 0) {
-                error_report("vhost guest notifier cleanup failed: %d", ret);
+        int r = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
+        if (r < 0) {
+            error_report("vhost guest notifier cleanup failed: %d", ret);
+            return r;
         }
     }
-    assert(ret >= 0);
 
     vhost_dev_disable_notifiers(&vsc->dev, vdev);
+    return ret;
 }
 
 uint64_t vhost_scsi_common_get_features(VirtIODevice *vdev, uint64_t features,
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index adb41b9816..8e7efc38f2 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -52,19 +52,19 @@ static int vhost_user_scsi_start(VHostUserSCSI *s, Error **errp)
     return ret;
 }
 
-static void vhost_user_scsi_stop(VHostUserSCSI *s)
+static int vhost_user_scsi_stop(VHostUserSCSI *s)
 {
     VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
 
     if (!s->started_vu) {
-        return;
+        return 0;
     }
     s->started_vu = false;
 
-    vhost_scsi_common_stop(vsc);
+    return vhost_scsi_common_stop(vsc);
 }
 
-static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
+static int vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
 {
     VHostUserSCSI *s = (VHostUserSCSI *)vdev;
     DeviceState *dev = DEVICE(vdev);
@@ -75,11 +75,11 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
     int ret;
 
     if (!s->connected) {
-        return;
+        return -1;
     }
 
     if (vhost_dev_is_started(&vsc->dev) == should_start) {
-        return;
+        return 0;
     }
 
     if (should_start) {
@@ -91,8 +91,12 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
             qemu_chr_fe_disconnect(&vs->conf.chardev);
         }
     } else {
-        vhost_user_scsi_stop(s);
+        ret = vhost_user_scsi_stop(s);
+        if (ret) {
+            return ret;
+        }
     }
+    return 0;
 }
 
 static void vhost_user_scsi_handle_output(VirtIODevice *vdev, VirtQueue *vq)
@@ -399,7 +403,7 @@ static void vhost_user_scsi_class_init(ObjectClass *klass, void *data)
     vdc->unrealize = vhost_user_scsi_unrealize;
     vdc->get_features = vhost_scsi_common_get_features;
     vdc->set_config = vhost_scsi_common_set_config;
-    vdc->set_status = vhost_user_scsi_set_status;
+    vdc->set_status_ext = vhost_user_scsi_set_status;
     fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
     vdc->reset = vhost_user_scsi_reset;
     vdc->get_vhost = vhost_user_scsi_get_vhost;
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 5e8d4cab53..fff7cdb35d 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2221,12 +2221,12 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
 {
     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
     trace_virtio_set_status(vdev, val);
+    int ret = 0;
 
     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
             val & VIRTIO_CONFIG_S_FEATURES_OK) {
-            int ret = virtio_validate_features(vdev);
-
+            ret = virtio_validate_features(vdev);
             if (ret) {
                 return ret;
             }
@@ -2238,12 +2238,18 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
     }
 
-    if (k->set_status) {
+    if (k->set_status_ext) {
+        ret = k->set_status_ext(vdev, val);
+        if (ret) {
+            qemu_log("set %s status to %d failed, old status: %d\n",
+                     vdev->name, val, vdev->status);
+        }
+    } else if (k->set_status) {
         k->set_status(vdev, val);
     }
     vdev->status = val;
 
-    return 0;
+    return ret;
 }
 
 static enum virtio_device_endian virtio_default_endian(void)
@@ -3436,7 +3442,11 @@ static int virtio_vmstate_change(void *opaque, bool running, RunState state)
     }
 
     if (!backend_run) {
-        virtio_set_status(vdev, vdev->status);
+        // the return value was used for stopping VM during migration
+        int ret = virtio_set_status(vdev, vdev->status);
+        if (ret) {
+            return ret;
+        }
     }
     return 0;
 }
diff --git a/include/hw/virtio/vhost-scsi-common.h b/include/hw/virtio/vhost-scsi-common.h
index c5d2c09455..d54d9c916f 100644
--- a/include/hw/virtio/vhost-scsi-common.h
+++ b/include/hw/virtio/vhost-scsi-common.h
@@ -40,7 +40,7 @@ struct VHostSCSICommon {
 };
 
 int vhost_scsi_common_start(VHostSCSICommon *vsc, Error **errp);
-void vhost_scsi_common_stop(VHostSCSICommon *vsc);
+int vhost_scsi_common_stop(VHostSCSICommon *vsc);
 char *vhost_scsi_common_get_fw_dev_path(FWPathProvider *p, BusState *bus,
                                         DeviceState *dev);
 void vhost_scsi_common_set_config(VirtIODevice *vdev, const uint8_t *config);
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 6386910280..c99d56f519 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -187,6 +187,7 @@ struct VirtioDeviceClass {
     void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
     void (*reset)(VirtIODevice *vdev);
     void (*set_status)(VirtIODevice *vdev, uint8_t val);
+    int (*set_status_ext)(VirtIODevice *vdev, uint8_t val);
     /* Device must validate queue_index.  */
     void (*queue_reset)(VirtIODevice *vdev, uint32_t queue_index);
     /* Device must validate queue_index.  */
-- 
2.48.1
Re: [PATCH v2 3/3] vhost-user: return failure if backend crash when live migration
Posted by Stefano Garzarella 2 weeks, 1 day ago
On Fri, Mar 14, 2025 at 06:15:34AM -0400, Haoqian He wrote:
>Live migration should be terminated if the backend crashes before
>the migration completes.
>
>Since the vhost device will be stopped when VM is stopped before
>the end of the live migration, current implementation if vhost
>backend died, vhost device's set_status() will not return failure,
>live migration won't perceive the disconnection between qemu and
>vhost backend, inflight io would be submitted in migration target
>host, leading to IO error.
>
>To fix this issue:
>1. Add set_status_ext() which has return value for
>VirtioDeviceClass and vhost-user-blk/scsi use the _ext version.
>
>2. In set_status_ext(), return failure if the flag `connected`
>is false or vhost_dev_stop return failure, which means qemu lost
>connection with backend.
>
>Hence migration_completion() will process failure, terminate
>migration and restore VM.
>
>Signed-off-by: Haoqian He <haoqian.he@smartx.com>
>---
> hw/block/vhost-user-blk.c             | 29 +++++++++++++++------------
> hw/scsi/vhost-scsi-common.c           | 13 ++++++------
> hw/scsi/vhost-user-scsi.c             | 20 ++++++++++--------
> hw/virtio/virtio.c                    | 20 +++++++++++++-----
> include/hw/virtio/vhost-scsi-common.h |  2 +-
> include/hw/virtio/virtio.h            |  1 +
> 6 files changed, 52 insertions(+), 33 deletions(-)
>
>diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
>index ae42327cf8..4865786c54 100644
>--- a/hw/block/vhost-user-blk.c
>+++ b/hw/block/vhost-user-blk.c
>@@ -204,7 +204,7 @@ err_host_notifiers:
>     return ret;
> }
>
>-static void vhost_user_blk_stop(VirtIODevice *vdev)
>+static int vhost_user_blk_stop(VirtIODevice *vdev)
> {
>     VHostUserBlk *s = VHOST_USER_BLK(vdev);
>     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
>@@ -212,26 +212,26 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
>     int ret;
>
>     if (!s->started_vu) {
>-        return;
>+        return 0;
>     }
>     s->started_vu = false;
>
>     if (!k->set_guest_notifiers) {
>-        return;
>+        return 0;
>     }
>
>-    vhost_dev_stop(&s->dev, vdev, true);
>+    ret = vhost_dev_stop(&s->dev, vdev, true);
>
>-    ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
>-    if (ret < 0) {
>+    if (k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false) < 0) {
>         error_report("vhost guest notifier cleanup failed: %d", ret);
>-        return;
>+        return -1;
>     }
>
>     vhost_dev_disable_notifiers(&s->dev, vdev);
>+    return ret;
> }
>
>-static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>+static int vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
> {
>     VHostUserBlk *s = VHOST_USER_BLK(vdev);
>     bool should_start = virtio_device_should_start(vdev, status);
>@@ -239,11 +239,11 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>     int ret;
>
>     if (!s->connected) {
>-        return;
>+        return -1;
>     }
>
>     if (vhost_dev_is_started(&s->dev) == should_start) {
>-        return;
>+        return 0;
>     }
>
>     if (should_start) {
>@@ -253,9 +253,12 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>             qemu_chr_fe_disconnect(&s->chardev);
>         }
>     } else {
>-        vhost_user_blk_stop(vdev);
>+        ret = vhost_user_blk_stop(vdev);
>+        if (ret < 0) {
>+            return ret;
>+        }
>     }
>-
>+    return 0;
> }
>
> static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
>@@ -597,7 +600,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
>     vdc->get_config = vhost_user_blk_update_config;
>     vdc->set_config = vhost_user_blk_set_config;
>     vdc->get_features = vhost_user_blk_get_features;
>-    vdc->set_status = vhost_user_blk_set_status;
>+    vdc->set_status_ext = vhost_user_blk_set_status;
>     vdc->reset = vhost_user_blk_reset;
>     vdc->get_vhost = vhost_user_blk_get_vhost;
> }
>diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c
>index 4c8637045d..43525ba46d 100644
>--- a/hw/scsi/vhost-scsi-common.c
>+++ b/hw/scsi/vhost-scsi-common.c
>@@ -101,24 +101,25 @@ err_host_notifiers:
>     return ret;
> }
>
>-void vhost_scsi_common_stop(VHostSCSICommon *vsc)
>+int vhost_scsi_common_stop(VHostSCSICommon *vsc)
> {
>     VirtIODevice *vdev = VIRTIO_DEVICE(vsc);
>     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
>     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
>     int ret = 0;
>
>-    vhost_dev_stop(&vsc->dev, vdev, true);
>+    ret = vhost_dev_stop(&vsc->dev, vdev, true);
>
>     if (k->set_guest_notifiers) {
>-        ret = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
>-        if (ret < 0) {
>-                error_report("vhost guest notifier cleanup failed: %d", ret);
>+        int r = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
>+        if (r < 0) {
>+            error_report("vhost guest notifier cleanup failed: %d", ret);

The variable `ret` in the error_report() seems wrong.

>+            return r;
>         }
>     }
>-    assert(ret >= 0);
>
>     vhost_dev_disable_notifiers(&vsc->dev, vdev);
>+    return ret;
> }
>
> uint64_t vhost_scsi_common_get_features(VirtIODevice *vdev, uint64_t features,
>diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
>index adb41b9816..8e7efc38f2 100644
>--- a/hw/scsi/vhost-user-scsi.c
>+++ b/hw/scsi/vhost-user-scsi.c
>@@ -52,19 +52,19 @@ static int vhost_user_scsi_start(VHostUserSCSI *s, Error **errp)
>     return ret;
> }
>
>-static void vhost_user_scsi_stop(VHostUserSCSI *s)
>+static int vhost_user_scsi_stop(VHostUserSCSI *s)
> {
>     VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>
>     if (!s->started_vu) {
>-        return;
>+        return 0;
>     }
>     s->started_vu = false;
>
>-    vhost_scsi_common_stop(vsc);
>+    return vhost_scsi_common_stop(vsc);
> }
>
>-static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>+static int vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
> {
>     VHostUserSCSI *s = (VHostUserSCSI *)vdev;
>     DeviceState *dev = DEVICE(vdev);
>@@ -75,11 +75,11 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>     int ret;
>
>     if (!s->connected) {
>-        return;
>+        return -1;
>     }
>
>     if (vhost_dev_is_started(&vsc->dev) == should_start) {
>-        return;
>+        return 0;
>     }
>
>     if (should_start) {
>@@ -91,8 +91,12 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>             qemu_chr_fe_disconnect(&vs->conf.chardev);
>         }
>     } else {
>-        vhost_user_scsi_stop(s);
>+        ret = vhost_user_scsi_stop(s);
>+        if (ret) {
>+            return ret;
>+        }
>     }
>+    return 0;
> }
>
> static void vhost_user_scsi_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>@@ -399,7 +403,7 @@ static void vhost_user_scsi_class_init(ObjectClass *klass, void *data)
>     vdc->unrealize = vhost_user_scsi_unrealize;
>     vdc->get_features = vhost_scsi_common_get_features;
>     vdc->set_config = vhost_scsi_common_set_config;
>-    vdc->set_status = vhost_user_scsi_set_status;
>+    vdc->set_status_ext = vhost_user_scsi_set_status;
>     fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
>     vdc->reset = vhost_user_scsi_reset;
>     vdc->get_vhost = vhost_user_scsi_get_vhost;
>diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
>index 5e8d4cab53..fff7cdb35d 100644
>--- a/hw/virtio/virtio.c
>+++ b/hw/virtio/virtio.c
>@@ -2221,12 +2221,12 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> {
>     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
>     trace_virtio_set_status(vdev, val);
>+    int ret = 0;
>
>     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
>         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
>             val & VIRTIO_CONFIG_S_FEATURES_OK) {
>-            int ret = virtio_validate_features(vdev);
>-
>+            ret = virtio_validate_features(vdev);
>             if (ret) {
>                 return ret;
>             }
>@@ -2238,12 +2238,18 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
>         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
>     }
>
>-    if (k->set_status) {
>+    if (k->set_status_ext) {
>+        ret = k->set_status_ext(vdev, val);
>+        if (ret) {
>+            qemu_log("set %s status to %d failed, old status: %d\n",
>+                     vdev->name, val, vdev->status);
>+        }
>+    } else if (k->set_status) {
>         k->set_status(vdev, val);
>     }
>     vdev->status = val;
>
>-    return 0;
>+    return ret;
> }
>
> static enum virtio_device_endian virtio_default_endian(void)
>@@ -3436,7 +3442,11 @@ static int virtio_vmstate_change(void *opaque, bool running, RunState state)
>     }
>
>     if (!backend_run) {
>-        virtio_set_status(vdev, vdev->status);
>+        // the return value was used for stopping VM during migration

Can you elaborate a bit this comment?

>+        int ret = virtio_set_status(vdev, vdev->status);
>+        if (ret) {
>+            return ret;
>+        }
>     }
>     return 0;
> }
>diff --git a/include/hw/virtio/vhost-scsi-common.h b/include/hw/virtio/vhost-scsi-common.h
>index c5d2c09455..d54d9c916f 100644
>--- a/include/hw/virtio/vhost-scsi-common.h
>+++ b/include/hw/virtio/vhost-scsi-common.h
>@@ -40,7 +40,7 @@ struct VHostSCSICommon {
> };
>
> int vhost_scsi_common_start(VHostSCSICommon *vsc, Error **errp);
>-void vhost_scsi_common_stop(VHostSCSICommon *vsc);
>+int vhost_scsi_common_stop(VHostSCSICommon *vsc);
> char *vhost_scsi_common_get_fw_dev_path(FWPathProvider *p, BusState *bus,
>                                         DeviceState *dev);
> void vhost_scsi_common_set_config(VirtIODevice *vdev, const uint8_t *config);
>diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
>index 6386910280..c99d56f519 100644
>--- a/include/hw/virtio/virtio.h
>+++ b/include/hw/virtio/virtio.h
>@@ -187,6 +187,7 @@ struct VirtioDeviceClass {
>     void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
>     void (*reset)(VirtIODevice *vdev);
>     void (*set_status)(VirtIODevice *vdev, uint8_t val);
>+    int (*set_status_ext)(VirtIODevice *vdev, uint8_t val);

Why we need a new callback instead having `set_status` returning int ?

Thanks,
Stefano

>     /* Device must validate queue_index.  */
>     void (*queue_reset)(VirtIODevice *vdev, uint32_t queue_index);
>     /* Device must validate queue_index.  */
>-- 
>2.48.1
>
Re: [PATCH v2 3/3] vhost-user: return failure if backend crash when live migration
Posted by Haoqian He 2 weeks ago

> 2025年3月19日 23:20,Stefano Garzarella <sgarzare@redhat.com> 写道:
> 
> On Fri, Mar 14, 2025 at 06:15:34AM -0400, Haoqian He wrote:
>> Live migration should be terminated if the backend crashes before
>> the migration completes.
>> 
>> Since the vhost device will be stopped when VM is stopped before
>> the end of the live migration, current implementation if vhost
>> backend died, vhost device's set_status() will not return failure,
>> live migration won't perceive the disconnection between qemu and
>> vhost backend, inflight io would be submitted in migration target
>> host, leading to IO error.
>> 
>> To fix this issue:
>> 1. Add set_status_ext() which has return value for
>> VirtioDeviceClass and vhost-user-blk/scsi use the _ext version.
>> 
>> 2. In set_status_ext(), return failure if the flag `connected`
>> is false or vhost_dev_stop return failure, which means qemu lost
>> connection with backend.
>> 
>> Hence migration_completion() will process failure, terminate
>> migration and restore VM.
>> 
>> Signed-off-by: Haoqian He <haoqian.he@smartx.com>
>> ---
>> hw/block/vhost-user-blk.c             | 29 +++++++++++++++------------
>> hw/scsi/vhost-scsi-common.c           | 13 ++++++------
>> hw/scsi/vhost-user-scsi.c             | 20 ++++++++++--------
>> hw/virtio/virtio.c                    | 20 +++++++++++++-----
>> include/hw/virtio/vhost-scsi-common.h |  2 +-
>> include/hw/virtio/virtio.h            |  1 +
>> 6 files changed, 52 insertions(+), 33 deletions(-)
>> 
>> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
>> index ae42327cf8..4865786c54 100644
>> --- a/hw/block/vhost-user-blk.c
>> +++ b/hw/block/vhost-user-blk.c
>> @@ -204,7 +204,7 @@ err_host_notifiers:
>>    return ret;
>> }
>> 
>> -static void vhost_user_blk_stop(VirtIODevice *vdev)
>> +static int vhost_user_blk_stop(VirtIODevice *vdev)
>> {
>>    VHostUserBlk *s = VHOST_USER_BLK(vdev);
>>    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
>> @@ -212,26 +212,26 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
>>    int ret;
>> 
>>    if (!s->started_vu) {
>> -        return;
>> +        return 0;
>>    }
>>    s->started_vu = false;
>> 
>>    if (!k->set_guest_notifiers) {
>> -        return;
>> +        return 0;
>>    }
>> 
>> -    vhost_dev_stop(&s->dev, vdev, true);
>> +    ret = vhost_dev_stop(&s->dev, vdev, true);
>> 
>> -    ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
>> -    if (ret < 0) {
>> +    if (k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false) < 0) {
>>        error_report("vhost guest notifier cleanup failed: %d", ret);
>> -        return;
>> +        return -1;
>>    }
>> 
>>    vhost_dev_disable_notifiers(&s->dev, vdev);
>> +    return ret;
>> }
>> 
>> -static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>> +static int vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>> {
>>    VHostUserBlk *s = VHOST_USER_BLK(vdev);
>>    bool should_start = virtio_device_should_start(vdev, status);
>> @@ -239,11 +239,11 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>>    int ret;
>> 
>>    if (!s->connected) {
>> -        return;
>> +        return -1;
>>    }
>> 
>>    if (vhost_dev_is_started(&s->dev) == should_start) {
>> -        return;
>> +        return 0;
>>    }
>> 
>>    if (should_start) {
>> @@ -253,9 +253,12 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>>            qemu_chr_fe_disconnect(&s->chardev);
>>        }
>>    } else {
>> -        vhost_user_blk_stop(vdev);
>> +        ret = vhost_user_blk_stop(vdev);
>> +        if (ret < 0) {
>> +            return ret;
>> +        }
>>    }
>> -
>> +    return 0;
>> }
>> 
>> static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
>> @@ -597,7 +600,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
>>    vdc->get_config = vhost_user_blk_update_config;
>>    vdc->set_config = vhost_user_blk_set_config;
>>    vdc->get_features = vhost_user_blk_get_features;
>> -    vdc->set_status = vhost_user_blk_set_status;
>> +    vdc->set_status_ext = vhost_user_blk_set_status;
>>    vdc->reset = vhost_user_blk_reset;
>>    vdc->get_vhost = vhost_user_blk_get_vhost;
>> }
>> diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c
>> index 4c8637045d..43525ba46d 100644
>> --- a/hw/scsi/vhost-scsi-common.c
>> +++ b/hw/scsi/vhost-scsi-common.c
>> @@ -101,24 +101,25 @@ err_host_notifiers:
>>    return ret;
>> }
>> 
>> -void vhost_scsi_common_stop(VHostSCSICommon *vsc)
>> +int vhost_scsi_common_stop(VHostSCSICommon *vsc)
>> {
>>    VirtIODevice *vdev = VIRTIO_DEVICE(vsc);
>>    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
>>    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
>>    int ret = 0;
>> 
>> -    vhost_dev_stop(&vsc->dev, vdev, true);
>> +    ret = vhost_dev_stop(&vsc->dev, vdev, true);
>> 
>>    if (k->set_guest_notifiers) {
>> -        ret = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
>> -        if (ret < 0) {
>> -                error_report("vhost guest notifier cleanup failed: %d", ret);
>> +        int r = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
>> +        if (r < 0) {
>> +            error_report("vhost guest notifier cleanup failed: %d", ret);
> 
> The variable `ret` in the error_report() seems wrong.

Ohh, thanks, I will fix it later.

> 
>> +            return r;
>>        }
>>    }
>> -    assert(ret >= 0);
>> 
>>    vhost_dev_disable_notifiers(&vsc->dev, vdev);
>> +    return ret;
>> }
>> 
>> uint64_t vhost_scsi_common_get_features(VirtIODevice *vdev, uint64_t features,
>> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
>> index adb41b9816..8e7efc38f2 100644
>> --- a/hw/scsi/vhost-user-scsi.c
>> +++ b/hw/scsi/vhost-user-scsi.c
>> @@ -52,19 +52,19 @@ static int vhost_user_scsi_start(VHostUserSCSI *s, Error **errp)
>>    return ret;
>> }
>> 
>> -static void vhost_user_scsi_stop(VHostUserSCSI *s)
>> +static int vhost_user_scsi_stop(VHostUserSCSI *s)
>> {
>>    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>> 
>>    if (!s->started_vu) {
>> -        return;
>> +        return 0;
>>    }
>>    s->started_vu = false;
>> 
>> -    vhost_scsi_common_stop(vsc);
>> +    return vhost_scsi_common_stop(vsc);
>> }
>> 
>> -static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>> +static int vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>> {
>>    VHostUserSCSI *s = (VHostUserSCSI *)vdev;
>>    DeviceState *dev = DEVICE(vdev);
>> @@ -75,11 +75,11 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>>    int ret;
>> 
>>    if (!s->connected) {
>> -        return;
>> +        return -1;
>>    }
>> 
>>    if (vhost_dev_is_started(&vsc->dev) == should_start) {
>> -        return;
>> +        return 0;
>>    }
>> 
>>    if (should_start) {
>> @@ -91,8 +91,12 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>>            qemu_chr_fe_disconnect(&vs->conf.chardev);
>>        }
>>    } else {
>> -        vhost_user_scsi_stop(s);
>> +        ret = vhost_user_scsi_stop(s);
>> +        if (ret) {
>> +            return ret;
>> +        }
>>    }
>> +    return 0;
>> }
>> 
>> static void vhost_user_scsi_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>> @@ -399,7 +403,7 @@ static void vhost_user_scsi_class_init(ObjectClass *klass, void *data)
>>    vdc->unrealize = vhost_user_scsi_unrealize;
>>    vdc->get_features = vhost_scsi_common_get_features;
>>    vdc->set_config = vhost_scsi_common_set_config;
>> -    vdc->set_status = vhost_user_scsi_set_status;
>> +    vdc->set_status_ext = vhost_user_scsi_set_status;
>>    fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
>>    vdc->reset = vhost_user_scsi_reset;
>>    vdc->get_vhost = vhost_user_scsi_get_vhost;
>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
>> index 5e8d4cab53..fff7cdb35d 100644
>> --- a/hw/virtio/virtio.c
>> +++ b/hw/virtio/virtio.c
>> @@ -2221,12 +2221,12 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
>> {
>>    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
>>    trace_virtio_set_status(vdev, val);
>> +    int ret = 0;
>> 
>>    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
>>        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
>>            val & VIRTIO_CONFIG_S_FEATURES_OK) {
>> -            int ret = virtio_validate_features(vdev);
>> -
>> +            ret = virtio_validate_features(vdev);
>>            if (ret) {
>>                return ret;
>>            }
>> @@ -2238,12 +2238,18 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
>>        virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
>>    }
>> 
>> -    if (k->set_status) {
>> +    if (k->set_status_ext) {
>> +        ret = k->set_status_ext(vdev, val);
>> +        if (ret) {
>> +            qemu_log("set %s status to %d failed, old status: %d\n",
>> +                     vdev->name, val, vdev->status);
>> +        }
>> +    } else if (k->set_status) {
>>        k->set_status(vdev, val);
>>    }
>>    vdev->status = val;
>> 
>> -    return 0;
>> +    return ret;
>> }
>> 
>> static enum virtio_device_endian virtio_default_endian(void)
>> @@ -3436,7 +3442,11 @@ static int virtio_vmstate_change(void *opaque, bool running, RunState state)
>>    }
>> 
>>    if (!backend_run) {
>> -        virtio_set_status(vdev, vdev->status);
>> +        // the return value was used for stopping VM during migration
> 
> Can you elaborate a bit this comment?

This comment is to explain that the return value is used to indicate that
the live migration of the stop vhost-user device failed cuz the lost
connection with backend.

> 
>> +        int ret = virtio_set_status(vdev, vdev->status);
>> +        if (ret) {
>> +            return ret;
>> +        }
>>    }
>>    return 0;
>> }
>> diff --git a/include/hw/virtio/vhost-scsi-common.h b/include/hw/virtio/vhost-scsi-common.h
>> index c5d2c09455..d54d9c916f 100644
>> --- a/include/hw/virtio/vhost-scsi-common.h
>> +++ b/include/hw/virtio/vhost-scsi-common.h
>> @@ -40,7 +40,7 @@ struct VHostSCSICommon {
>> };
>> 
>> int vhost_scsi_common_start(VHostSCSICommon *vsc, Error **errp);
>> -void vhost_scsi_common_stop(VHostSCSICommon *vsc);
>> +int vhost_scsi_common_stop(VHostSCSICommon *vsc);
>> char *vhost_scsi_common_get_fw_dev_path(FWPathProvider *p, BusState *bus,
>>                                        DeviceState *dev);
>> void vhost_scsi_common_set_config(VirtIODevice *vdev, const uint8_t *config);
>> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
>> index 6386910280..c99d56f519 100644
>> --- a/include/hw/virtio/virtio.h
>> +++ b/include/hw/virtio/virtio.h
>> @@ -187,6 +187,7 @@ struct VirtioDeviceClass {
>>    void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
>>    void (*reset)(VirtIODevice *vdev);
>>    void (*set_status)(VirtIODevice *vdev, uint8_t val);
>> +    int (*set_status_ext)(VirtIODevice *vdev, uint8_t val);
> 
> Why we need a new callback instead having `set_status` returning int ?

Because there are other devices such as virtio-net, virtio-ballon, etc.,
we only focus on vhost-user-blk/scsi when live migration.

> 
> Thanks,
> Stefano
> 
>>    /* Device must validate queue_index.  */
>>    void (*queue_reset)(VirtIODevice *vdev, uint32_t queue_index);
>>    /* Device must validate queue_index.  */
>> -- 
>> 2.48.1
Re: [PATCH v2 3/3] vhost-user: return failure if backend crash when live migration
Posted by Stefano Garzarella 1 week, 3 days ago
On Thu, Mar 20, 2025 at 08:21:30PM +0800, Haoqian He wrote:
>
>
>> 2025年3月19日 23:20,Stefano Garzarella <sgarzare@redhat.com> 写道:
>>
>> On Fri, Mar 14, 2025 at 06:15:34AM -0400, Haoqian He wrote:
>>> Live migration should be terminated if the backend crashes before
>>> the migration completes.
>>>
>>> Since the vhost device will be stopped when VM is stopped before
>>> the end of the live migration, current implementation if vhost
>>> backend died, vhost device's set_status() will not return failure,
>>> live migration won't perceive the disconnection between qemu and
>>> vhost backend, inflight io would be submitted in migration target
>>> host, leading to IO error.
>>>
>>> To fix this issue:
>>> 1. Add set_status_ext() which has return value for
>>> VirtioDeviceClass and vhost-user-blk/scsi use the _ext version.
>>>
>>> 2. In set_status_ext(), return failure if the flag `connected`
>>> is false or vhost_dev_stop return failure, which means qemu lost
>>> connection with backend.
>>>
>>> Hence migration_completion() will process failure, terminate
>>> migration and restore VM.
>>>
>>> Signed-off-by: Haoqian He <haoqian.he@smartx.com>
>>> ---
>>> hw/block/vhost-user-blk.c             | 29 +++++++++++++++------------
>>> hw/scsi/vhost-scsi-common.c           | 13 ++++++------
>>> hw/scsi/vhost-user-scsi.c             | 20 ++++++++++--------
>>> hw/virtio/virtio.c                    | 20 +++++++++++++-----
>>> include/hw/virtio/vhost-scsi-common.h |  2 +-
>>> include/hw/virtio/virtio.h            |  1 +
>>> 6 files changed, 52 insertions(+), 33 deletions(-)
>>>
>>> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
>>> index ae42327cf8..4865786c54 100644
>>> --- a/hw/block/vhost-user-blk.c
>>> +++ b/hw/block/vhost-user-blk.c
>>> @@ -204,7 +204,7 @@ err_host_notifiers:
>>>    return ret;
>>> }
>>>
>>> -static void vhost_user_blk_stop(VirtIODevice *vdev)
>>> +static int vhost_user_blk_stop(VirtIODevice *vdev)
>>> {
>>>    VHostUserBlk *s = VHOST_USER_BLK(vdev);
>>>    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
>>> @@ -212,26 +212,26 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
>>>    int ret;
>>>
>>>    if (!s->started_vu) {
>>> -        return;
>>> +        return 0;
>>>    }
>>>    s->started_vu = false;
>>>
>>>    if (!k->set_guest_notifiers) {
>>> -        return;
>>> +        return 0;
>>>    }
>>>
>>> -    vhost_dev_stop(&s->dev, vdev, true);
>>> +    ret = vhost_dev_stop(&s->dev, vdev, true);
>>>
>>> -    ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
>>> -    if (ret < 0) {
>>> +    if (k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false) < 0) {
>>>        error_report("vhost guest notifier cleanup failed: %d", ret);
>>> -        return;
>>> +        return -1;
>>>    }
>>>
>>>    vhost_dev_disable_notifiers(&s->dev, vdev);
>>> +    return ret;
>>> }
>>>
>>> -static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>>> +static int vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>>> {
>>>    VHostUserBlk *s = VHOST_USER_BLK(vdev);
>>>    bool should_start = virtio_device_should_start(vdev, status);
>>> @@ -239,11 +239,11 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>>>    int ret;
>>>
>>>    if (!s->connected) {
>>> -        return;
>>> +        return -1;
>>>    }
>>>
>>>    if (vhost_dev_is_started(&s->dev) == should_start) {
>>> -        return;
>>> +        return 0;
>>>    }
>>>
>>>    if (should_start) {
>>> @@ -253,9 +253,12 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>>>            qemu_chr_fe_disconnect(&s->chardev);
>>>        }
>>>    } else {
>>> -        vhost_user_blk_stop(vdev);
>>> +        ret = vhost_user_blk_stop(vdev);
>>> +        if (ret < 0) {
>>> +            return ret;
>>> +        }
>>>    }
>>> -
>>> +    return 0;
>>> }
>>>
>>> static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
>>> @@ -597,7 +600,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
>>>    vdc->get_config = vhost_user_blk_update_config;
>>>    vdc->set_config = vhost_user_blk_set_config;
>>>    vdc->get_features = vhost_user_blk_get_features;
>>> -    vdc->set_status = vhost_user_blk_set_status;
>>> +    vdc->set_status_ext = vhost_user_blk_set_status;
>>>    vdc->reset = vhost_user_blk_reset;
>>>    vdc->get_vhost = vhost_user_blk_get_vhost;
>>> }
>>> diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c
>>> index 4c8637045d..43525ba46d 100644
>>> --- a/hw/scsi/vhost-scsi-common.c
>>> +++ b/hw/scsi/vhost-scsi-common.c
>>> @@ -101,24 +101,25 @@ err_host_notifiers:
>>>    return ret;
>>> }
>>>
>>> -void vhost_scsi_common_stop(VHostSCSICommon *vsc)
>>> +int vhost_scsi_common_stop(VHostSCSICommon *vsc)
>>> {
>>>    VirtIODevice *vdev = VIRTIO_DEVICE(vsc);
>>>    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
>>>    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
>>>    int ret = 0;
>>>
>>> -    vhost_dev_stop(&vsc->dev, vdev, true);
>>> +    ret = vhost_dev_stop(&vsc->dev, vdev, true);
>>>
>>>    if (k->set_guest_notifiers) {
>>> -        ret = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
>>> -        if (ret < 0) {
>>> -                error_report("vhost guest notifier cleanup failed: %d", ret);
>>> +        int r = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
>>> +        if (r < 0) {
>>> +            error_report("vhost guest notifier cleanup failed: %d", ret);
>>
>> The variable `ret` in the error_report() seems wrong.
>
>Ohh, thanks, I will fix it later.
>
>>
>>> +            return r;
>>>        }
>>>    }
>>> -    assert(ret >= 0);
>>>
>>>    vhost_dev_disable_notifiers(&vsc->dev, vdev);
>>> +    return ret;
>>> }
>>>
>>> uint64_t vhost_scsi_common_get_features(VirtIODevice *vdev, uint64_t features,
>>> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
>>> index adb41b9816..8e7efc38f2 100644
>>> --- a/hw/scsi/vhost-user-scsi.c
>>> +++ b/hw/scsi/vhost-user-scsi.c
>>> @@ -52,19 +52,19 @@ static int vhost_user_scsi_start(VHostUserSCSI *s, Error **errp)
>>>    return ret;
>>> }
>>>
>>> -static void vhost_user_scsi_stop(VHostUserSCSI *s)
>>> +static int vhost_user_scsi_stop(VHostUserSCSI *s)
>>> {
>>>    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>>>
>>>    if (!s->started_vu) {
>>> -        return;
>>> +        return 0;
>>>    }
>>>    s->started_vu = false;
>>>
>>> -    vhost_scsi_common_stop(vsc);
>>> +    return vhost_scsi_common_stop(vsc);
>>> }
>>>
>>> -static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>>> +static int vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>>> {
>>>    VHostUserSCSI *s = (VHostUserSCSI *)vdev;
>>>    DeviceState *dev = DEVICE(vdev);
>>> @@ -75,11 +75,11 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>>>    int ret;
>>>
>>>    if (!s->connected) {
>>> -        return;
>>> +        return -1;
>>>    }
>>>
>>>    if (vhost_dev_is_started(&vsc->dev) == should_start) {
>>> -        return;
>>> +        return 0;
>>>    }
>>>
>>>    if (should_start) {
>>> @@ -91,8 +91,12 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>>>            qemu_chr_fe_disconnect(&vs->conf.chardev);
>>>        }
>>>    } else {
>>> -        vhost_user_scsi_stop(s);
>>> +        ret = vhost_user_scsi_stop(s);
>>> +        if (ret) {
>>> +            return ret;
>>> +        }
>>>    }
>>> +    return 0;
>>> }
>>>
>>> static void vhost_user_scsi_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>>> @@ -399,7 +403,7 @@ static void vhost_user_scsi_class_init(ObjectClass *klass, void *data)
>>>    vdc->unrealize = vhost_user_scsi_unrealize;
>>>    vdc->get_features = vhost_scsi_common_get_features;
>>>    vdc->set_config = vhost_scsi_common_set_config;
>>> -    vdc->set_status = vhost_user_scsi_set_status;
>>> +    vdc->set_status_ext = vhost_user_scsi_set_status;
>>>    fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
>>>    vdc->reset = vhost_user_scsi_reset;
>>>    vdc->get_vhost = vhost_user_scsi_get_vhost;
>>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
>>> index 5e8d4cab53..fff7cdb35d 100644
>>> --- a/hw/virtio/virtio.c
>>> +++ b/hw/virtio/virtio.c
>>> @@ -2221,12 +2221,12 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
>>> {
>>>    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
>>>    trace_virtio_set_status(vdev, val);
>>> +    int ret = 0;
>>>
>>>    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
>>>        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
>>>            val & VIRTIO_CONFIG_S_FEATURES_OK) {
>>> -            int ret = virtio_validate_features(vdev);
>>> -
>>> +            ret = virtio_validate_features(vdev);
>>>            if (ret) {
>>>                return ret;
>>>            }
>>> @@ -2238,12 +2238,18 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
>>>        virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
>>>    }
>>>
>>> -    if (k->set_status) {
>>> +    if (k->set_status_ext) {
>>> +        ret = k->set_status_ext(vdev, val);
>>> +        if (ret) {
>>> +            qemu_log("set %s status to %d failed, old status: %d\n",
>>> +                     vdev->name, val, vdev->status);
>>> +        }
>>> +    } else if (k->set_status) {
>>>        k->set_status(vdev, val);
>>>    }
>>>    vdev->status = val;
>>>
>>> -    return 0;
>>> +    return ret;
>>> }
>>>
>>> static enum virtio_device_endian virtio_default_endian(void)
>>> @@ -3436,7 +3442,11 @@ static int virtio_vmstate_change(void *opaque, bool running, RunState state)
>>>    }
>>>
>>>    if (!backend_run) {
>>> -        virtio_set_status(vdev, vdev->status);
>>> +        // the return value was used for stopping VM during migration
>>
>> Can you elaborate a bit this comment?
>
>This comment is to explain that the return value is used to indicate that
>the live migration of the stop vhost-user device failed cuz the lost
>connection with backend.
>
>>
>>> +        int ret = virtio_set_status(vdev, vdev->status);
>>> +        if (ret) {
>>> +            return ret;
>>> +        }
>>>    }
>>>    return 0;
>>> }
>>> diff --git a/include/hw/virtio/vhost-scsi-common.h b/include/hw/virtio/vhost-scsi-common.h
>>> index c5d2c09455..d54d9c916f 100644
>>> --- a/include/hw/virtio/vhost-scsi-common.h
>>> +++ b/include/hw/virtio/vhost-scsi-common.h
>>> @@ -40,7 +40,7 @@ struct VHostSCSICommon {
>>> };
>>>
>>> int vhost_scsi_common_start(VHostSCSICommon *vsc, Error **errp);
>>> -void vhost_scsi_common_stop(VHostSCSICommon *vsc);
>>> +int vhost_scsi_common_stop(VHostSCSICommon *vsc);
>>> char *vhost_scsi_common_get_fw_dev_path(FWPathProvider *p, BusState *bus,
>>>                                        DeviceState *dev);
>>> void vhost_scsi_common_set_config(VirtIODevice *vdev, const uint8_t *config);
>>> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
>>> index 6386910280..c99d56f519 100644
>>> --- a/include/hw/virtio/virtio.h
>>> +++ b/include/hw/virtio/virtio.h
>>> @@ -187,6 +187,7 @@ struct VirtioDeviceClass {
>>>    void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
>>>    void (*reset)(VirtIODevice *vdev);
>>>    void (*set_status)(VirtIODevice *vdev, uint8_t val);
>>> +    int (*set_status_ext)(VirtIODevice *vdev, uint8_t val);
>>
>> Why we need a new callback instead having `set_status` returning int ?
>
>Because there are other devices such as virtio-net, virtio-ballon, etc.,
>we only focus on vhost-user-blk/scsi when live migration.

Why only them?

What I mean, is why in devices where it's not important, don't we just 
return 0?
It seems more complicated to maintain and confusing for new devices to 
have 2 callbacks for the same thing.

Stefano


Re: [PATCH v2 3/3] vhost-user: return failure if backend crash when live migration
Posted by Haoqian He 1 week, 2 days ago
> 2025年3月24日 22:31,Stefano Garzarella <sgarzare@redhat.com> 写道:
> 
> On Thu, Mar 20, 2025 at 08:21:30PM +0800, Haoqian He wrote:
>> 
>> 
>>> 2025年3月19日 23:20,Stefano Garzarella <sgarzare@redhat.com> 写道:
>>> 
>>> On Fri, Mar 14, 2025 at 06:15:34AM -0400, Haoqian He wrote:
>>>> Live migration should be terminated if the backend crashes before
>>>> the migration completes.
>>>> 
>>>> Since the vhost device will be stopped when VM is stopped before
>>>> the end of the live migration, current implementation if vhost
>>>> backend died, vhost device's set_status() will not return failure,
>>>> live migration won't perceive the disconnection between qemu and
>>>> vhost backend, inflight io would be submitted in migration target
>>>> host, leading to IO error.
>>>> 
>>>> To fix this issue:
>>>> 1. Add set_status_ext() which has return value for
>>>> VirtioDeviceClass and vhost-user-blk/scsi use the _ext version.
>>>> 
>>>> 2. In set_status_ext(), return failure if the flag `connected`
>>>> is false or vhost_dev_stop return failure, which means qemu lost
>>>> connection with backend.
>>>> 
>>>> Hence migration_completion() will process failure, terminate
>>>> migration and restore VM.
>>>> 
>>>> Signed-off-by: Haoqian He <haoqian.he@smartx.com>
>>>> ---
>>>> hw/block/vhost-user-blk.c             | 29 +++++++++++++++------------
>>>> hw/scsi/vhost-scsi-common.c           | 13 ++++++------
>>>> hw/scsi/vhost-user-scsi.c             | 20 ++++++++++--------
>>>> hw/virtio/virtio.c                    | 20 +++++++++++++-----
>>>> include/hw/virtio/vhost-scsi-common.h |  2 +-
>>>> include/hw/virtio/virtio.h            |  1 +
>>>> 6 files changed, 52 insertions(+), 33 deletions(-)
>>>> 
>>>> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
>>>> index ae42327cf8..4865786c54 100644
>>>> --- a/hw/block/vhost-user-blk.c
>>>> +++ b/hw/block/vhost-user-blk.c
>>>> @@ -204,7 +204,7 @@ err_host_notifiers:
>>>> return ret;
>>>> }
>>>> 
>>>> -static void vhost_user_blk_stop(VirtIODevice *vdev)
>>>> +static int vhost_user_blk_stop(VirtIODevice *vdev)
>>>> {
>>>> VHostUserBlk *s = VHOST_USER_BLK(vdev);
>>>> BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
>>>> @@ -212,26 +212,26 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
>>>> int ret;
>>>> 
>>>> if (!s->started_vu) {
>>>> -        return;
>>>> +        return 0;
>>>> }
>>>> s->started_vu = false;
>>>> 
>>>> if (!k->set_guest_notifiers) {
>>>> -        return;
>>>> +        return 0;
>>>> }
>>>> 
>>>> -    vhost_dev_stop(&s->dev, vdev, true);
>>>> +    ret = vhost_dev_stop(&s->dev, vdev, true);
>>>> 
>>>> -    ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
>>>> -    if (ret < 0) {
>>>> +    if (k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false) < 0) {
>>>>    error_report("vhost guest notifier cleanup failed: %d", ret);
>>>> -        return;
>>>> +        return -1;
>>>> }
>>>> 
>>>> vhost_dev_disable_notifiers(&s->dev, vdev);
>>>> +    return ret;
>>>> }
>>>> 
>>>> -static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>>>> +static int vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>>>> {
>>>> VHostUserBlk *s = VHOST_USER_BLK(vdev);
>>>> bool should_start = virtio_device_should_start(vdev, status);
>>>> @@ -239,11 +239,11 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>>>> int ret;
>>>> 
>>>> if (!s->connected) {
>>>> -        return;
>>>> +        return -1;
>>>> }
>>>> 
>>>> if (vhost_dev_is_started(&s->dev) == should_start) {
>>>> -        return;
>>>> +        return 0;
>>>> }
>>>> 
>>>> if (should_start) {
>>>> @@ -253,9 +253,12 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>>>>        qemu_chr_fe_disconnect(&s->chardev);
>>>>    }
>>>> } else {
>>>> -        vhost_user_blk_stop(vdev);
>>>> +        ret = vhost_user_blk_stop(vdev);
>>>> +        if (ret < 0) {
>>>> +            return ret;
>>>> +        }
>>>> }
>>>> -
>>>> +    return 0;
>>>> }
>>>> 
>>>> static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
>>>> @@ -597,7 +600,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
>>>> vdc->get_config = vhost_user_blk_update_config;
>>>> vdc->set_config = vhost_user_blk_set_config;
>>>> vdc->get_features = vhost_user_blk_get_features;
>>>> -    vdc->set_status = vhost_user_blk_set_status;
>>>> +    vdc->set_status_ext = vhost_user_blk_set_status;
>>>> vdc->reset = vhost_user_blk_reset;
>>>> vdc->get_vhost = vhost_user_blk_get_vhost;
>>>> }
>>>> diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c
>>>> index 4c8637045d..43525ba46d 100644
>>>> --- a/hw/scsi/vhost-scsi-common.c
>>>> +++ b/hw/scsi/vhost-scsi-common.c
>>>> @@ -101,24 +101,25 @@ err_host_notifiers:
>>>> return ret;
>>>> }
>>>> 
>>>> -void vhost_scsi_common_stop(VHostSCSICommon *vsc)
>>>> +int vhost_scsi_common_stop(VHostSCSICommon *vsc)
>>>> {
>>>> VirtIODevice *vdev = VIRTIO_DEVICE(vsc);
>>>> BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
>>>> VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
>>>> int ret = 0;
>>>> 
>>>> -    vhost_dev_stop(&vsc->dev, vdev, true);
>>>> +    ret = vhost_dev_stop(&vsc->dev, vdev, true);
>>>> 
>>>> if (k->set_guest_notifiers) {
>>>> -        ret = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
>>>> -        if (ret < 0) {
>>>> -                error_report("vhost guest notifier cleanup failed: %d", ret);
>>>> +        int r = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
>>>> +        if (r < 0) {
>>>> +            error_report("vhost guest notifier cleanup failed: %d", ret);
>>> 
>>> The variable `ret` in the error_report() seems wrong.
>> 
>> Ohh, thanks, I will fix it later.
>> 
>>> 
>>>> +            return r;
>>>>    }
>>>> }
>>>> -    assert(ret >= 0);
>>>> 
>>>> vhost_dev_disable_notifiers(&vsc->dev, vdev);
>>>> +    return ret;
>>>> }
>>>> 
>>>> uint64_t vhost_scsi_common_get_features(VirtIODevice *vdev, uint64_t features,
>>>> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
>>>> index adb41b9816..8e7efc38f2 100644
>>>> --- a/hw/scsi/vhost-user-scsi.c
>>>> +++ b/hw/scsi/vhost-user-scsi.c
>>>> @@ -52,19 +52,19 @@ static int vhost_user_scsi_start(VHostUserSCSI *s, Error **errp)
>>>> return ret;
>>>> }
>>>> 
>>>> -static void vhost_user_scsi_stop(VHostUserSCSI *s)
>>>> +static int vhost_user_scsi_stop(VHostUserSCSI *s)
>>>> {
>>>> VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
>>>> 
>>>> if (!s->started_vu) {
>>>> -        return;
>>>> +        return 0;
>>>> }
>>>> s->started_vu = false;
>>>> 
>>>> -    vhost_scsi_common_stop(vsc);
>>>> +    return vhost_scsi_common_stop(vsc);
>>>> }
>>>> 
>>>> -static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>>>> +static int vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>>>> {
>>>> VHostUserSCSI *s = (VHostUserSCSI *)vdev;
>>>> DeviceState *dev = DEVICE(vdev);
>>>> @@ -75,11 +75,11 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>>>> int ret;
>>>> 
>>>> if (!s->connected) {
>>>> -        return;
>>>> +        return -1;
>>>> }
>>>> 
>>>> if (vhost_dev_is_started(&vsc->dev) == should_start) {
>>>> -        return;
>>>> +        return 0;
>>>> }
>>>> 
>>>> if (should_start) {
>>>> @@ -91,8 +91,12 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>>>>        qemu_chr_fe_disconnect(&vs->conf.chardev);
>>>>    }
>>>> } else {
>>>> -        vhost_user_scsi_stop(s);
>>>> +        ret = vhost_user_scsi_stop(s);
>>>> +        if (ret) {
>>>> +            return ret;
>>>> +        }
>>>> }
>>>> +    return 0;
>>>> }
>>>> 
>>>> static void vhost_user_scsi_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>>>> @@ -399,7 +403,7 @@ static void vhost_user_scsi_class_init(ObjectClass *klass, void *data)
>>>> vdc->unrealize = vhost_user_scsi_unrealize;
>>>> vdc->get_features = vhost_scsi_common_get_features;
>>>> vdc->set_config = vhost_scsi_common_set_config;
>>>> -    vdc->set_status = vhost_user_scsi_set_status;
>>>> +    vdc->set_status_ext = vhost_user_scsi_set_status;
>>>> fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
>>>> vdc->reset = vhost_user_scsi_reset;
>>>> vdc->get_vhost = vhost_user_scsi_get_vhost;
>>>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
>>>> index 5e8d4cab53..fff7cdb35d 100644
>>>> --- a/hw/virtio/virtio.c
>>>> +++ b/hw/virtio/virtio.c
>>>> @@ -2221,12 +2221,12 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
>>>> {
>>>> VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
>>>> trace_virtio_set_status(vdev, val);
>>>> +    int ret = 0;
>>>> 
>>>> if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
>>>>    if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
>>>>        val & VIRTIO_CONFIG_S_FEATURES_OK) {
>>>> -            int ret = virtio_validate_features(vdev);
>>>> -
>>>> +            ret = virtio_validate_features(vdev);
>>>>        if (ret) {
>>>>            return ret;
>>>>        }
>>>> @@ -2238,12 +2238,18 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
>>>>    virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
>>>> }
>>>> 
>>>> -    if (k->set_status) {
>>>> +    if (k->set_status_ext) {
>>>> +        ret = k->set_status_ext(vdev, val);
>>>> +        if (ret) {
>>>> +            qemu_log("set %s status to %d failed, old status: %d\n",
>>>> +                     vdev->name, val, vdev->status);
>>>> +        }
>>>> +    } else if (k->set_status) {
>>>>    k->set_status(vdev, val);
>>>> }
>>>> vdev->status = val;
>>>> 
>>>> -    return 0;
>>>> +    return ret;
>>>> }
>>>> 
>>>> static enum virtio_device_endian virtio_default_endian(void)
>>>> @@ -3436,7 +3442,11 @@ static int virtio_vmstate_change(void *opaque, bool running, RunState state)
>>>> }
>>>> 
>>>> if (!backend_run) {
>>>> -        virtio_set_status(vdev, vdev->status);
>>>> +        // the return value was used for stopping VM during migration
>>> 
>>> Can you elaborate a bit this comment?
>> 
>> This comment is to explain that the return value is used to indicate that
>> the live migration of the stop vhost-user device failed cuz the lost
>> connection with backend.
>> 
>>> 
>>>> +        int ret = virtio_set_status(vdev, vdev->status);
>>>> +        if (ret) {
>>>> +            return ret;
>>>> +        }
>>>> }
>>>> return 0;
>>>> }
>>>> diff --git a/include/hw/virtio/vhost-scsi-common.h b/include/hw/virtio/vhost-scsi-common.h
>>>> index c5d2c09455..d54d9c916f 100644
>>>> --- a/include/hw/virtio/vhost-scsi-common.h
>>>> +++ b/include/hw/virtio/vhost-scsi-common.h
>>>> @@ -40,7 +40,7 @@ struct VHostSCSICommon {
>>>> };
>>>> 
>>>> int vhost_scsi_common_start(VHostSCSICommon *vsc, Error **errp);
>>>> -void vhost_scsi_common_stop(VHostSCSICommon *vsc);
>>>> +int vhost_scsi_common_stop(VHostSCSICommon *vsc);
>>>> char *vhost_scsi_common_get_fw_dev_path(FWPathProvider *p, BusState *bus,
>>>>                                    DeviceState *dev);
>>>> void vhost_scsi_common_set_config(VirtIODevice *vdev, const uint8_t *config);
>>>> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
>>>> index 6386910280..c99d56f519 100644
>>>> --- a/include/hw/virtio/virtio.h
>>>> +++ b/include/hw/virtio/virtio.h
>>>> @@ -187,6 +187,7 @@ struct VirtioDeviceClass {
>>>> void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
>>>> void (*reset)(VirtIODevice *vdev);
>>>> void (*set_status)(VirtIODevice *vdev, uint8_t val);
>>>> +    int (*set_status_ext)(VirtIODevice *vdev, uint8_t val);
>>> 
>>> Why we need a new callback instead having `set_status` returning int ?
>> 
>> Because there are other devices such as virtio-net, virtio-ballon, etc.,
>> we only focus on vhost-user-blk/scsi when live migration.
> 
> Why only them?
> 
> What I mean, is why in devices where it's not important, don't we just return 0?
> It seems more complicated to maintain and confusing for new devices to have 2 callbacks for the same thing.
> 
> Stefano

The series of these patches only want to fix that the inflight IO can't be
completed due to the disconnection between and the vhost-user backend for
vhost-user-blk / scsi devices during live migration. For other virito devices
the issue does not exist, and `vm_state_notify` cannot distinguish specific
devices, it's better not to return error.

I try to list the virtio sub-devices as follows:

hw/virtio/virtio-iommu.c:    vdc->set_status = virtio_iommu_set_status;
hw/virtio/virtio-balloon.c:    vdc->set_status = virtio_balloon_set_status;
hw/virtio/virtio-rng.c:    vdc->set_status = virtio_rng_set_status;
hw/virtio/virtio-crypto.c:    vdc->set_status = virtio_crypto_set_status;
hw/virtio/vhost-vsock.c:    vdc->set_status = vhost_vsock_set_status;
hw/virtio/vhost-user-vsock.c:    vdc->set_status = vuv_set_status;
hw/virtio/vhost-user-scmi.c:    vdc->set_status = vu_scmi_set_status;
hw/virtio/vhost-user-fs.c:    vdc->set_status = vuf_set_status;
hw/virtio/vhost-user-base.c:    vdc->set_status = vub_set_status;
hw/virtio/vdpa-dev.c:    vdc->set_status = vhost_vdpa_device_set_status;
tests/qtest/libqos/virtio-pci.c:    .set_status = qvirtio_pci_set_status,
tests/qtest/libqos/virtio-pci-modern.c:    .set_status = set_status,
tests/qtest/libqos/virtio-mmio.c:    .set_status = qvirtio_mmio_set_status,
hw/scsi/vhost-user-scsi.c:    vdc->set_status = vhost_user_scsi_set_status;
hw/scsi/vhost-scsi.c:    vdc->set_status = vhost_scsi_set_status;
hw/net/virtio-net.c:    vdc->set_status = virtio_net_set_status;
hw/char/virtio-serial-bus.c:    vdc->set_status = set_status;
hw/block/vhost-user-blk.c:    vdc->set_status = vhost_user_blk_set_status;
hw/block/virtio-blk.c:    vdc->set_status = virtio_blk_set_status;

If the new function pointer type is not added, the number of functions affected
will be very huge. Although it may seem a bit complicated to use two callbacks,
it's much safer.

Thanks,
Haoqian
Re: [PATCH v2 3/3] vhost-user: return failure if backend crash when live migration
Posted by Stefano Garzarella 1 week, 2 days ago
On Tue, Mar 25, 2025 at 04:39:46PM +0800, Haoqian He wrote:
>> 2025年3月24日 22:31,Stefano Garzarella <sgarzare@redhat.com> 写道:
>> On Thu, Mar 20, 2025 at 08:21:30PM +0800, Haoqian He wrote:
>>>> 2025年3月19日 23:20,Stefano Garzarella <sgarzare@redhat.com> 
>>>> 写道:
>>>> On Fri, Mar 14, 2025 at 06:15:34AM -0400, Haoqian He wrote:

[...]

>>>>> diff --git a/include/hw/virtio/virtio.h 
>>>>> b/include/hw/virtio/virtio.h
>>>>> index 6386910280..c99d56f519 100644
>>>>> --- a/include/hw/virtio/virtio.h
>>>>> +++ b/include/hw/virtio/virtio.h
>>>>> @@ -187,6 +187,7 @@ struct VirtioDeviceClass {
>>>>> void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
>>>>> void (*reset)(VirtIODevice *vdev);
>>>>> void (*set_status)(VirtIODevice *vdev, uint8_t val);
>>>>> +    int (*set_status_ext)(VirtIODevice *vdev, uint8_t val);
>>>>
>>>> Why we need a new callback instead having `set_status` returning int ?
>>>
>>> Because there are other devices such as virtio-net, virtio-ballon, etc.,
>>> we only focus on vhost-user-blk/scsi when live migration.
>>
>> Why only them?
>>
>> What I mean, is why in devices where it's not important, don't we just return 0?
>> It seems more complicated to maintain and confusing for new devices to have 2 callbacks for the same thing.
>>
>> Stefano
>
>The series of these patches only want to fix that the inflight IO can't be
>completed due to the disconnection between and the vhost-user backend for
>vhost-user-blk / scsi devices during live migration. For other virito devices
>the issue does not exist, and `vm_state_notify` cannot distinguish specific
>devices, it's better not to return error.

Why for example for vhost-user-fs it doesn't exist?

>
>I try to list the virtio sub-devices as follows:
>
>hw/virtio/virtio-iommu.c:    vdc->set_status = virtio_iommu_set_status;
>hw/virtio/virtio-balloon.c:    vdc->set_status = virtio_balloon_set_status;
>hw/virtio/virtio-rng.c:    vdc->set_status = virtio_rng_set_status;
>hw/virtio/virtio-crypto.c:    vdc->set_status = virtio_crypto_set_status;
>hw/virtio/vhost-vsock.c:    vdc->set_status = vhost_vsock_set_status;
>hw/virtio/vhost-user-vsock.c:    vdc->set_status = vuv_set_status;
>hw/virtio/vhost-user-scmi.c:    vdc->set_status = vu_scmi_set_status;
>hw/virtio/vhost-user-fs.c:    vdc->set_status = vuf_set_status;
>hw/virtio/vhost-user-base.c:    vdc->set_status = vub_set_status;
>hw/virtio/vdpa-dev.c:    vdc->set_status = vhost_vdpa_device_set_status;
>tests/qtest/libqos/virtio-pci.c:    .set_status = qvirtio_pci_set_status,
>tests/qtest/libqos/virtio-pci-modern.c:    .set_status = set_status,
>tests/qtest/libqos/virtio-mmio.c:    .set_status = qvirtio_mmio_set_status,
>hw/scsi/vhost-user-scsi.c:    vdc->set_status = vhost_user_scsi_set_status;
>hw/scsi/vhost-scsi.c:    vdc->set_status = vhost_scsi_set_status;
>hw/net/virtio-net.c:    vdc->set_status = virtio_net_set_status;
>hw/char/virtio-serial-bus.c:    vdc->set_status = set_status;
>hw/block/vhost-user-blk.c:    vdc->set_status = vhost_user_blk_set_status;
>hw/block/virtio-blk.c:    vdc->set_status = virtio_blk_set_status;
>
>If the new function pointer type is not added, the number of functions affected
>will be very huge. Although it may seem a bit complicated to use two callbacks,
>it's much safer.

I can understand that it requires more change, but I don't understand 
why it's safer, can you elaborate?

Anyway let's see what Michael says, if it's okay for him to have 2 
callbacks for the same thing but differing only by the return value, no 
objection for me.

Thanks,
Stefano


Re: [PATCH v2 3/3] vhost-user: return failure if backend crash when live migration
Posted by Haoqian He 1 week ago
> 2025年3月25日 17:51,Stefano Garzarella <sgarzare@redhat.com> 写道:
> 
> On Tue, Mar 25, 2025 at 04:39:46PM +0800, Haoqian He wrote:
>>> 2025年3月24日 22:31,Stefano Garzarella <sgarzare@redhat.com> 写道:
>>> On Thu, Mar 20, 2025 at 08:21:30PM +0800, Haoqian He wrote:
>>>>> 2025年3月19日 23:20,Stefano Garzarella <sgarzare@redhat.com> 写道:
>>>>> On Fri, Mar 14, 2025 at 06:15:34AM -0400, Haoqian He wrote:
> 
> [...]
> 
>>>>>> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
>>>>>> index 6386910280..c99d56f519 100644
>>>>>> --- a/include/hw/virtio/virtio.h
>>>>>> +++ b/include/hw/virtio/virtio.h
>>>>>> @@ -187,6 +187,7 @@ struct VirtioDeviceClass {
>>>>>> void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
>>>>>> void (*reset)(VirtIODevice *vdev);
>>>>>> void (*set_status)(VirtIODevice *vdev, uint8_t val);
>>>>>> +    int (*set_status_ext)(VirtIODevice *vdev, uint8_t val);
>>>>> 
>>>>> Why we need a new callback instead having `set_status` returning int ?
>>>> 
>>>> Because there are other devices such as virtio-net, virtio-ballon, etc.,
>>>> we only focus on vhost-user-blk/scsi when live migration.
>>> 
>>> Why only them?
>>> 
>>> What I mean, is why in devices where it's not important, don't we just return 0?
>>> It seems more complicated to maintain and confusing for new devices to have 2 callbacks for the same thing.
>>> 
>>> Stefano
>> 
>> The series of these patches only want to fix that the inflight IO can't be
>> completed due to the disconnection between and the vhost-user backend for
>> vhost-user-blk / scsi devices during live migration. For other virito devices
>> the issue does not exist, and `vm_state_notify` cannot distinguish specific
>> devices, it's better not to return error.
> 
> Why for example for vhost-user-fs it doesn't exist?
> 
>> 
>> I try to list the virtio sub-devices as follows:
>> 
>> hw/virtio/virtio-iommu.c:    vdc->set_status = virtio_iommu_set_status;
>> hw/virtio/virtio-balloon.c:    vdc->set_status = virtio_balloon_set_status;
>> hw/virtio/virtio-rng.c:    vdc->set_status = virtio_rng_set_status;
>> hw/virtio/virtio-crypto.c:    vdc->set_status = virtio_crypto_set_status;
>> hw/virtio/vhost-vsock.c:    vdc->set_status = vhost_vsock_set_status;
>> hw/virtio/vhost-user-vsock.c:    vdc->set_status = vuv_set_status;
>> hw/virtio/vhost-user-scmi.c:    vdc->set_status = vu_scmi_set_status;
>> hw/virtio/vhost-user-fs.c:    vdc->set_status = vuf_set_status;
>> hw/virtio/vhost-user-base.c:    vdc->set_status = vub_set_status;
>> hw/virtio/vdpa-dev.c:    vdc->set_status = vhost_vdpa_device_set_status;
>> tests/qtest/libqos/virtio-pci.c:    .set_status = qvirtio_pci_set_status,
>> tests/qtest/libqos/virtio-pci-modern.c:    .set_status = set_status,
>> tests/qtest/libqos/virtio-mmio.c:    .set_status = qvirtio_mmio_set_status,
>> hw/scsi/vhost-user-scsi.c:    vdc->set_status = vhost_user_scsi_set_status;
>> hw/scsi/vhost-scsi.c:    vdc->set_status = vhost_scsi_set_status;
>> hw/net/virtio-net.c:    vdc->set_status = virtio_net_set_status;
>> hw/char/virtio-serial-bus.c:    vdc->set_status = set_status;
>> hw/block/vhost-user-blk.c:    vdc->set_status = vhost_user_blk_set_status;
>> hw/block/virtio-blk.c:    vdc->set_status = virtio_blk_set_status;
>> 
>> If the new function pointer type is not added, the number of functions affected
>> will be very huge. Although it may seem a bit complicated to use two callbacks,
>> it's much safer.
> 
> I can understand that it requires more change, but I don't understand why it's safer, can you elaborate?
> 
> Anyway let's see what Michael says, if it's okay for him to have 2 callbacks for the same thing but differing only by the return value, no objection for me.
> 
> Thanks,
> Stefano
> 

Hi Stefano, I removed set_status_ext in patch v3, and only changed the return
type of set_status to int. The new changes were applied to all vhost-user
devices, and virtio returned 0 for other devices.

Could you please review patch v3 is reasonable?

Thanks,
Haoqian