The backend maybe crash when vhost_dev_stop and GET_VRING_BASE
would fail, we can return failure to indicate the connection
with the backend is broken.
Signed-off-by: Haoqian He <haoqian.he@smartx.com>
---
hw/virtio/vhost.c | 27 +++++++++++++++------------
include/hw/virtio/vhost.h | 8 +++++---
2 files changed, 20 insertions(+), 15 deletions(-)
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 6aa72fd434..c82bbbe4cc 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1368,23 +1368,23 @@ fail_alloc_desc:
return r;
}
-void vhost_virtqueue_stop(struct vhost_dev *dev,
- struct VirtIODevice *vdev,
- struct vhost_virtqueue *vq,
- unsigned idx)
+int vhost_virtqueue_stop(struct vhost_dev *dev,
+ struct VirtIODevice *vdev,
+ struct vhost_virtqueue *vq,
+ unsigned idx)
{
int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
struct vhost_vring_state state = {
.index = vhost_vq_index,
};
- int r;
+ int r = 0;
if (virtio_queue_get_desc_addr(vdev, idx) == 0) {
/* Don't stop the virtqueue which might have not been started */
- return;
+ return 0;
}
- r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
+ r |= dev->vhost_ops->vhost_get_vring_base(dev, &state);
if (r < 0) {
VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r);
/* Connection to the backend is broken, so let's sync internal
@@ -1412,6 +1412,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev,
0, virtio_queue_get_avail_size(vdev, idx));
vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
0, virtio_queue_get_desc_size(vdev, idx));
+ return r;
}
static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,
@@ -2136,9 +2137,10 @@ fail_features:
}
/* Host notifiers must be enabled at this point. */
-void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
+int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
{
int i;
+ int rc = 0;
/* should only be called after backend is connected */
assert(hdev->vhost_ops);
@@ -2157,10 +2159,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
vhost_dev_set_vring_enable(hdev, false);
}
for (i = 0; i < hdev->nvqs; ++i) {
- vhost_virtqueue_stop(hdev,
- vdev,
- hdev->vqs + i,
- hdev->vq_index + i);
+ rc |= vhost_virtqueue_stop(hdev,
+ vdev,
+ hdev->vqs + i,
+ hdev->vq_index + i);
}
if (hdev->vhost_ops->vhost_reset_status) {
hdev->vhost_ops->vhost_reset_status(hdev);
@@ -2177,6 +2179,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
hdev->started = false;
vdev->vhost_started = false;
hdev->vdev = NULL;
+ return rc;
}
int vhost_net_set_backend(struct vhost_dev *hdev,
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index a9469d50bc..fd96ec9c39 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -232,8 +232,10 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
* Stop the vhost device. After the device is stopped the notifiers
* can be disabled (@vhost_dev_disable_notifiers) and the device can
* be torn down (@vhost_dev_cleanup).
+ *
+ * Return: 0 on success, != 0 on error when stopping dev.
*/
-void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
+int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
/**
* DOC: vhost device configuration handling
@@ -333,8 +335,8 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write);
int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev,
struct vhost_virtqueue *vq, unsigned idx);
-void vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev,
- struct vhost_virtqueue *vq, unsigned idx);
+int vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev,
+ struct vhost_virtqueue *vq, unsigned idx);
void vhost_dev_reset_inflight(struct vhost_inflight *inflight);
void vhost_dev_free_inflight(struct vhost_inflight *inflight);
--
2.48.1
On Fri, Mar 14, 2025 at 06:15:33AM -0400, Haoqian He wrote: >The backend maybe crash when vhost_dev_stop and GET_VRING_BASE >would fail, we can return failure to indicate the connection >with the backend is broken. > >Signed-off-by: Haoqian He <haoqian.he@smartx.com> >--- > hw/virtio/vhost.c | 27 +++++++++++++++------------ > include/hw/virtio/vhost.h | 8 +++++--- > 2 files changed, 20 insertions(+), 15 deletions(-) > >diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c >index 6aa72fd434..c82bbbe4cc 100644 >--- a/hw/virtio/vhost.c >+++ b/hw/virtio/vhost.c >@@ -1368,23 +1368,23 @@ fail_alloc_desc: > return r; > } > >-void vhost_virtqueue_stop(struct vhost_dev *dev, >- struct VirtIODevice *vdev, >- struct vhost_virtqueue *vq, >- unsigned idx) >+int vhost_virtqueue_stop(struct vhost_dev *dev, >+ struct VirtIODevice *vdev, >+ struct vhost_virtqueue *vq, >+ unsigned idx) > { > int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); > struct vhost_vring_state state = { > .index = vhost_vq_index, > }; >- int r; >+ int r = 0; > > if (virtio_queue_get_desc_addr(vdev, idx) == 0) { > /* Don't stop the virtqueue which might have not been started */ >- return; >+ return 0; > } > >- r = dev->vhost_ops->vhost_get_vring_base(dev, &state); >+ r |= dev->vhost_ops->vhost_get_vring_base(dev, &state); We can avoid this and also initialize r to 0. > if (r < 0) { > VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r); > /* Connection to the backend is broken, so let's sync internal >@@ -1412,6 +1412,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev, > 0, virtio_queue_get_avail_size(vdev, idx)); > vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), > 0, virtio_queue_get_desc_size(vdev, idx)); >+ return r; > } > > static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev, >@@ -2136,9 +2137,10 @@ fail_features: > } > > /* Host notifiers must be enabled at this point. */ >-void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) >+int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) > { > int i; >+ int rc = 0; > > /* should only be called after backend is connected */ > assert(hdev->vhost_ops); >@@ -2157,10 +2159,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) > vhost_dev_set_vring_enable(hdev, false); > } > for (i = 0; i < hdev->nvqs; ++i) { >- vhost_virtqueue_stop(hdev, >- vdev, >- hdev->vqs + i, >- hdev->vq_index + i); >+ rc |= vhost_virtqueue_stop(hdev, >+ vdev, >+ hdev->vqs + i, >+ hdev->vq_index + i); Also other function can fails, should we consider also them? (e.g. vhost_dev_start, vhost_dev_set_vring_enable, etc.) If not, why? > } > if (hdev->vhost_ops->vhost_reset_status) { > hdev->vhost_ops->vhost_reset_status(hdev); >@@ -2177,6 +2179,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) > hdev->started = false; > vdev->vhost_started = false; > hdev->vdev = NULL; >+ return rc; > } > > int vhost_net_set_backend(struct vhost_dev *hdev, >diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h >index a9469d50bc..fd96ec9c39 100644 >--- a/include/hw/virtio/vhost.h >+++ b/include/hw/virtio/vhost.h >@@ -232,8 +232,10 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); > * Stop the vhost device. After the device is stopped the notifiers > * can be disabled (@vhost_dev_disable_notifiers) and the device can > * be torn down (@vhost_dev_cleanup). >+ * >+ * Return: 0 on success, != 0 on error when stopping dev. > */ >-void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); >+int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); > > /** > * DOC: vhost device configuration handling >@@ -333,8 +335,8 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); > > int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev, > struct vhost_virtqueue *vq, unsigned idx); >-void vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, >- struct vhost_virtqueue *vq, unsigned idx); >+int vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, >+ struct vhost_virtqueue *vq, unsigned idx); > > void vhost_dev_reset_inflight(struct vhost_inflight *inflight); > void vhost_dev_free_inflight(struct vhost_inflight *inflight); >-- >2.48.1 >
> 2025年3月19日 23:11,Stefano Garzarella <sgarzare@redhat.com> 写道: > > On Fri, Mar 14, 2025 at 06:15:33AM -0400, Haoqian He wrote: >> The backend maybe crash when vhost_dev_stop and GET_VRING_BASE >> would fail, we can return failure to indicate the connection >> with the backend is broken. >> >> Signed-off-by: Haoqian He <haoqian.he@smartx.com> >> --- >> hw/virtio/vhost.c | 27 +++++++++++++++------------ >> include/hw/virtio/vhost.h | 8 +++++--- >> 2 files changed, 20 insertions(+), 15 deletions(-) >> >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c >> index 6aa72fd434..c82bbbe4cc 100644 >> --- a/hw/virtio/vhost.c >> +++ b/hw/virtio/vhost.c >> @@ -1368,23 +1368,23 @@ fail_alloc_desc: >> return r; >> } >> >> -void vhost_virtqueue_stop(struct vhost_dev *dev, >> - struct VirtIODevice *vdev, >> - struct vhost_virtqueue *vq, >> - unsigned idx) >> +int vhost_virtqueue_stop(struct vhost_dev *dev, >> + struct VirtIODevice *vdev, >> + struct vhost_virtqueue *vq, >> + unsigned idx) >> { >> int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); >> struct vhost_vring_state state = { >> .index = vhost_vq_index, >> }; >> - int r; >> + int r = 0; >> >> if (virtio_queue_get_desc_addr(vdev, idx) == 0) { >> /* Don't stop the virtqueue which might have not been started */ >> - return; >> + return 0; >> } >> >> - r = dev->vhost_ops->vhost_get_vring_base(dev, &state); >> + r |= dev->vhost_ops->vhost_get_vring_base(dev, &state); > > We can avoid this and also initialize r to 0. Here we need to do `vhost_virtqueue_stop` for each vq. > >> if (r < 0) { >> VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r); >> /* Connection to the backend is broken, so let's sync internal >> @@ -1412,6 +1412,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev, >> 0, virtio_queue_get_avail_size(vdev, idx)); >> vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), >> 0, virtio_queue_get_desc_size(vdev, idx)); >> + return r; >> } >> >> static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev, >> @@ -2136,9 +2137,10 @@ fail_features: >> } >> >> /* Host notifiers must be enabled at this point. */ >> -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) >> +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) >> { >> int i; >> + int rc = 0; >> >> /* should only be called after backend is connected */ >> assert(hdev->vhost_ops); >> @@ -2157,10 +2159,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) >> vhost_dev_set_vring_enable(hdev, false); >> } >> for (i = 0; i < hdev->nvqs; ++i) { >> - vhost_virtqueue_stop(hdev, >> - vdev, >> - hdev->vqs + i, >> - hdev->vq_index + i); >> + rc |= vhost_virtqueue_stop(hdev, >> + vdev, >> + hdev->vqs + i, >> + hdev->vq_index + i); > > Also other function can fails, should we consider also them? > (e.g. , vhost_dev_set_vring_enable, etc.) > > If not, why? Since we only want to know the return value of callback when the stopping device live migration, there is no need to catch the failure of `vhost_dev_start`. We can also catch the failure of `vhost_dev_set_vring_enable`, because `vhost_dev_set_vring_enable` will also fail if qemu lost connection with the the backend, but I need to test it. > >> } >> if (hdev->vhost_ops->vhost_reset_status) { >> hdev->vhost_ops->vhost_reset_status(hdev); >> @@ -2177,6 +2179,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) >> hdev->started = false; >> vdev->vhost_started = false; >> hdev->vdev = NULL; >> + return rc; >> } >> >> int vhost_net_set_backend(struct vhost_dev *hdev, >> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h >> index a9469d50bc..fd96ec9c39 100644 >> --- a/include/hw/virtio/vhost.h >> +++ b/include/hw/virtio/vhost.h >> @@ -232,8 +232,10 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); >> * Stop the vhost device. After the device is stopped the notifiers >> * can be disabled (@vhost_dev_disable_notifiers) and the device can >> * be torn down (@vhost_dev_cleanup). >> + * >> + * Return: 0 on success, != 0 on error when stopping dev. >> */ >> -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); >> +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); >> >> /** >> * DOC: vhost device configuration handling >> @@ -333,8 +335,8 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); >> >> int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev, >> struct vhost_virtqueue *vq, unsigned idx); >> -void vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, >> - struct vhost_virtqueue *vq, unsigned idx); >> +int vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, >> + struct vhost_virtqueue *vq, unsigned idx); >> >> void vhost_dev_reset_inflight(struct vhost_inflight *inflight); >> void vhost_dev_free_inflight(struct vhost_inflight *inflight); >> -- >> 2.48.1
On Thu, Mar 20, 2025 at 08:21:25PM +0800, Haoqian He wrote: > >> 2025年3月19日 23:11,Stefano Garzarella <sgarzare@redhat.com> 写道: >> >> On Fri, Mar 14, 2025 at 06:15:33AM -0400, Haoqian He wrote: >>> The backend maybe crash when vhost_dev_stop and GET_VRING_BASE >>> would fail, we can return failure to indicate the connection >>> with the backend is broken. >>> >>> Signed-off-by: Haoqian He <haoqian.he@smartx.com> >>> --- >>> hw/virtio/vhost.c | 27 +++++++++++++++------------ >>> include/hw/virtio/vhost.h | 8 +++++--- >>> 2 files changed, 20 insertions(+), 15 deletions(-) >>> >>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c >>> index 6aa72fd434..c82bbbe4cc 100644 >>> --- a/hw/virtio/vhost.c >>> +++ b/hw/virtio/vhost.c >>> @@ -1368,23 +1368,23 @@ fail_alloc_desc: >>> return r; >>> } >>> >>> -void vhost_virtqueue_stop(struct vhost_dev *dev, >>> - struct VirtIODevice *vdev, >>> - struct vhost_virtqueue *vq, >>> - unsigned idx) >>> +int vhost_virtqueue_stop(struct vhost_dev *dev, >>> + struct VirtIODevice *vdev, >>> + struct vhost_virtqueue *vq, >>> + unsigned idx) >>> { >>> int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); >>> struct vhost_vring_state state = { >>> .index = vhost_vq_index, >>> }; >>> - int r; >>> + int r = 0; >>> >>> if (virtio_queue_get_desc_addr(vdev, idx) == 0) { >>> /* Don't stop the virtqueue which might have not been started */ >>> - return; >>> + return 0; >>> } >>> >>> - r = dev->vhost_ops->vhost_get_vring_base(dev, &state); >>> + r |= dev->vhost_ops->vhost_get_vring_base(dev, &state); >> >> We can avoid this and also initialize r to 0. > >Here we need to do `vhost_virtqueue_stop` for each vq. Sorry, my question is what's the point of initializing r to 0 and then putting it in or here with the result of vhost_get_vring_base? Can't we leave it as before and initialize it directly here? > >> >>> if (r < 0) { >>> VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r); >>> /* Connection to the backend is broken, so let's sync internal >>> @@ -1412,6 +1412,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev, >>> 0, virtio_queue_get_avail_size(vdev, idx)); >>> vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), >>> 0, virtio_queue_get_desc_size(vdev, idx)); >>> + return r; >>> } >>> >>> static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev, >>> @@ -2136,9 +2137,10 @@ fail_features: >>> } >>> >>> /* Host notifiers must be enabled at this point. */ >>> -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) >>> +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) >>> { >>> int i; >>> + int rc = 0; >>> >>> /* should only be called after backend is connected */ >>> assert(hdev->vhost_ops); >>> @@ -2157,10 +2159,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) >>> vhost_dev_set_vring_enable(hdev, false); >>> } >>> for (i = 0; i < hdev->nvqs; ++i) { >>> - vhost_virtqueue_stop(hdev, >>> - vdev, >>> - hdev->vqs + i, >>> - hdev->vq_index + i); >>> + rc |= vhost_virtqueue_stop(hdev, >>> + vdev, >>> + hdev->vqs + i, >>> + hdev->vq_index + i); >> >> Also other function can fails, should we consider also them? >> (e.g. , vhost_dev_set_vring_enable, etc.) >> >> If not, why? > >Since we only want to know the return value of callback when the >stopping device >live migration, there is no need to catch the failure of `vhost_dev_start`. Please add that in the commit message, and maybe also in a comment here. > >We can also catch the failure of `vhost_dev_set_vring_enable`, because >`vhost_dev_set_vring_enable` will also fail if qemu lost connection with the >the backend, but I need to test it. > Capturing failures of only some things is a little confusing to me, I think it needs to be better explained. Thanks, Stefano > >> >>> } >>> if (hdev->vhost_ops->vhost_reset_status) { >>> hdev->vhost_ops->vhost_reset_status(hdev); >>> @@ -2177,6 +2179,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) >>> hdev->started = false; >>> vdev->vhost_started = false; >>> hdev->vdev = NULL; >>> + return rc; >>> } >>> >>> int vhost_net_set_backend(struct vhost_dev *hdev, >>> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h >>> index a9469d50bc..fd96ec9c39 100644 >>> --- a/include/hw/virtio/vhost.h >>> +++ b/include/hw/virtio/vhost.h >>> @@ -232,8 +232,10 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); >>> * Stop the vhost device. After the device is stopped the notifiers >>> * can be disabled (@vhost_dev_disable_notifiers) and the device can >>> * be torn down (@vhost_dev_cleanup). >>> + * >>> + * Return: 0 on success, != 0 on error when stopping dev. >>> */ >>> -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); >>> +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); >>> >>> /** >>> * DOC: vhost device configuration handling >>> @@ -333,8 +335,8 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); >>> >>> int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev, >>> struct vhost_virtqueue *vq, unsigned idx); >>> -void vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, >>> - struct vhost_virtqueue *vq, unsigned idx); >>> +int vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, >>> + struct vhost_virtqueue *vq, unsigned idx); >>> >>> void vhost_dev_reset_inflight(struct vhost_inflight *inflight); >>> void vhost_dev_free_inflight(struct vhost_inflight *inflight); >>> -- >>> 2.48.1 > >
2025年3月24日 22:25,Stefano Garzarella <sgarzare@redhat.com> 写道: On Thu, Mar 20, 2025 at 08:21:25PM +0800, Haoqian He wrote: 2025年3月19日 23:11,Stefano Garzarella <sgarzare@redhat.com> 写道: On Fri, Mar 14, 2025 at 06:15:33AM -0400, Haoqian He wrote: The backend maybe crash when vhost_dev_stop and GET_VRING_BASE would fail, we can return failure to indicate the connection with the backend is broken. Signed-off-by: Haoqian He <haoqian.he@smartx.com> --- hw/virtio/vhost.c | 27 +++++++++++++++------------ include/hw/virtio/vhost.h | 8 +++++--- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 6aa72fd434..c82bbbe4cc 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1368,23 +1368,23 @@ fail_alloc_desc: return r; } -void vhost_virtqueue_stop(struct vhost_dev *dev, - struct VirtIODevice *vdev, - struct vhost_virtqueue *vq, - unsigned idx) +int vhost_virtqueue_stop(struct vhost_dev *dev, + struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, + unsigned idx) { int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); struct vhost_vring_state state = { .index = vhost_vq_index, }; - int r; + int r = 0; if (virtio_queue_get_desc_addr(vdev, idx) == 0) { /* Don't stop the virtqueue which might have not been started */ - return; + return 0; } - r = dev->vhost_ops->vhost_get_vring_base(dev, &state); + r |= dev->vhost_ops->vhost_get_vring_base(dev, &state); We can avoid this and also initialize r to 0. Here we need to do `vhost_virtqueue_stop` for each vq. Sorry, my question is what's the point of initializing r to 0 and then putting it in or here with the result of vhost_get_vring_base? Can't we leave it as before and initialize it directly here? Acked. if (r < 0) { VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r); /* Connection to the backend is broken, so let's sync internal @@ -1412,6 +1412,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev, 0, virtio_queue_get_avail_size(vdev, idx)); vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), 0, virtio_queue_get_desc_size(vdev, idx)); + return r; } static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev, @@ -2136,9 +2137,10 @@ fail_features: } /* Host notifiers must be enabled at this point. */ -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) { int i; + int rc = 0; /* should only be called after backend is connected */ assert(hdev->vhost_ops); @@ -2157,10 +2159,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) vhost_dev_set_vring_enable(hdev, false); } for (i = 0; i < hdev->nvqs; ++i) { - vhost_virtqueue_stop(hdev, - vdev, - hdev->vqs + i, - hdev->vq_index + i); + rc |= vhost_virtqueue_stop(hdev, + vdev, + hdev->vqs + i, + hdev->vq_index + i); Also other function can fails, should we consider also them? (e.g. , vhost_dev_set_vring_enable, etc.) If not, why? Since we only want to know the return value of callback when the stopping device live migration, there is no need to catch the failure of `vhost_dev_start`. Please add that in the commit message, and maybe also in a comment here. We can also catch the failure of `vhost_dev_set_vring_enable`, because `vhost_dev_set_vring_enable` will also fail if qemu lost connection with the the backend, but I need to test it. Capturing failures of only some things is a little confusing to me, I think it needs to be better explained. Only capture vhost_virtqueue_stop's error based on the following considerations: 1. `vhost_dev_start` always return 0 when stopping, so there is no need to capture the error. 2. For `vhost_dev_set_vring_enable`/`vhost_reset_status`, it is necessary to satisfy that virtio has set certain feature bits (e.g. VHOST_USER_PROTOCOL_F_STATUS). Relatively speaking, `vhost_virtqueue_stop` is more universal, if stop virtiqueue fails, we will abort the live migration. 3. Even if we capture the error of any function, we cannot return directly, and we still need to execute the rest of `vhost_dev_stop`. Thanks, Haoqian Thanks, Stefano } if (hdev->vhost_ops->vhost_reset_status) { hdev->vhost_ops->vhost_reset_status(hdev); @@ -2177,6 +2179,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) hdev->started = false; vdev->vhost_started = false; hdev->vdev = NULL; + return rc; } int vhost_net_set_backend(struct vhost_dev *hdev, diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index a9469d50bc..fd96ec9c39 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -232,8 +232,10 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); * Stop the vhost device. After the device is stopped the notifiers * can be disabled (@vhost_dev_disable_notifiers) and the device can * be torn down (@vhost_dev_cleanup). + * + * Return: 0 on success, != 0 on error when stopping dev. */ -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); /** * DOC: vhost device configuration handling @@ -333,8 +335,8 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev, struct vhost_virtqueue *vq, unsigned idx); -void vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, - struct vhost_virtqueue *vq, unsigned idx); +int vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, unsigned idx); void vhost_dev_reset_inflight(struct vhost_inflight *inflight); void vhost_dev_free_inflight(struct vhost_inflight *inflight); -- 2.48.1
On Tue, Mar 25, 2025 at 04:36:53PM +0800, Haoqian He wrote: >2025年3月24日 22:25,Stefano Garzarella <sgarzare@redhat.com> 写道: > >On Thu, Mar 20, 2025 at 08:21:25PM +0800, Haoqian He wrote: > > >2025年3月19日 23:11,Stefano Garzarella <sgarzare@redhat.com> 写道: > >On Fri, Mar 14, 2025 at 06:15:33AM -0400, Haoqian He wrote: > >The backend maybe crash when vhost_dev_stop and GET_VRING_BASE >would fail, we can return failure to indicate the connection >with the backend is broken. > >Signed-off-by: Haoqian He <haoqian.he@smartx.com> >--- >hw/virtio/vhost.c | 27 +++++++++++++++------------ >include/hw/virtio/vhost.h | 8 +++++--- >2 files changed, 20 insertions(+), 15 deletions(-) > >diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c >index 6aa72fd434..c82bbbe4cc 100644 >--- a/hw/virtio/vhost.c >+++ b/hw/virtio/vhost.c >@@ -1368,23 +1368,23 @@ fail_alloc_desc: >return r; >} > >-void vhost_virtqueue_stop(struct vhost_dev *dev, >- struct VirtIODevice *vdev, >- struct vhost_virtqueue *vq, >- unsigned idx) >+int vhost_virtqueue_stop(struct vhost_dev *dev, >+ struct VirtIODevice *vdev, >+ struct vhost_virtqueue *vq, >+ unsigned idx) >{ >int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); >struct vhost_vring_state state = { > .index = vhost_vq_index, >}; >- int r; >+ int r = 0; > >if (virtio_queue_get_desc_addr(vdev, idx) == 0) { > /* Don't stop the virtqueue which might have not been started */ >- return; >+ return 0; >} > >- r = dev->vhost_ops->vhost_get_vring_base(dev, &state); >+ r |= dev->vhost_ops->vhost_get_vring_base(dev, &state); > > >We can avoid this and also initialize r to 0. > > >Here we need to do `vhost_virtqueue_stop` for each vq. > > >Sorry, my question is what's the point of initializing r to 0 and then >putting it in or here with the result of vhost_get_vring_base? >Can't we leave it as before and initialize it directly here? > > >Acked. > > > > >if (r < 0) { > VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r); > /* Connection to the backend is broken, so let's sync internal >@@ -1412,6 +1412,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev, > 0, virtio_queue_get_avail_size(vdev, idx)); >vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), > 0, virtio_queue_get_desc_size(vdev, idx)); >+ return r; >} > >static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev, >@@ -2136,9 +2137,10 @@ fail_features: >} > >/* Host notifiers must be enabled at this point. */ >-void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) >+int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) >{ >int i; >+ int rc = 0; > >/* should only be called after backend is connected */ >assert(hdev->vhost_ops); >@@ -2157,10 +2159,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, >VirtIODevice *vdev, bool vrings) > vhost_dev_set_vring_enable(hdev, false); >} >for (i = 0; i < hdev->nvqs; ++i) { >- vhost_virtqueue_stop(hdev, >- vdev, >- hdev->vqs + i, >- hdev->vq_index + i); >+ rc |= vhost_virtqueue_stop(hdev, >+ vdev, >+ hdev->vqs + i, >+ hdev->vq_index + i); > > >Also other function can fails, should we consider also them? >(e.g. , vhost_dev_set_vring_enable, etc.) > >If not, why? > > >Since we only want to know the return value of callback when the stopping device >live migration, there is no need to catch the failure of `vhost_dev_start`. > > >Please add that in the commit message, and maybe also in a comment here. > > >We can also catch the failure of `vhost_dev_set_vring_enable`, because >`vhost_dev_set_vring_enable` will also fail if qemu lost connection with the >the backend, but I need to test it. > > >Capturing failures of only some things is a little confusing to me, I >think it needs to be better explained. > > >Only capture vhost_virtqueue_stop's error based on the following considerations: > >1. `vhost_dev_start` always return 0 when stopping, so there is no need to >capture the error. > >2. For `vhost_dev_set_vring_enable`/`vhost_reset_status`, it is necessary to >satisfy that virtio has set certain feature bits (e.g. >VHOST_USER_PROTOCOL_F_STATUS). >Relatively speaking, `vhost_virtqueue_stop` is more universal, if stop >virtiqueue >fails, we will abort the live migration. > >3. Even if we capture the error of any function, we cannot return directly, and >we still need to execute the rest of `vhost_dev_stop`. > This is exactly the information that a commit message should contain, so I suggest you add this kind of information to each commit to simplify both review, but also in the future possible bisection for issues. Thanks, Stefano >} >if (hdev->vhost_ops->vhost_reset_status) { > hdev->vhost_ops->vhost_reset_status(hdev); >@@ -2177,6 +2179,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, >VirtIODevice *vdev, bool vrings) >hdev->started = false; >vdev->vhost_started = false; >hdev->vdev = NULL; >+ return rc; >} > >int vhost_net_set_backend(struct vhost_dev *hdev, >diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h >index a9469d50bc..fd96ec9c39 100644 >--- a/include/hw/virtio/vhost.h >+++ b/include/hw/virtio/vhost.h >@@ -232,8 +232,10 @@ int vhost_dev_start(struct vhost_dev *hdev, >VirtIODevice *vdev, bool vrings); >* Stop the vhost device. After the device is stopped the notifiers >* can be disabled (@vhost_dev_disable_notifiers) and the device can >* be torn down (@vhost_dev_cleanup). >+ * >+ * Return: 0 on success, != 0 on error when stopping dev. >*/ >-void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); >+int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); > >/** >* DOC: vhost device configuration handling >@@ -333,8 +335,8 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, >uint64_t iova, int write); > >int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev, > struct vhost_virtqueue *vq, unsigned idx); >-void vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, >- struct vhost_virtqueue *vq, unsigned idx); >+int vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, >+ struct vhost_virtqueue *vq, unsigned idx); > >void vhost_dev_reset_inflight(struct vhost_inflight *inflight); >void vhost_dev_free_inflight(struct vhost_inflight *inflight); >-- >2.48.1 >
© 2016 - 2025 Red Hat, Inc.