When the vhost-user is reconnecting to the backend, and if the vhost-user fails
at the get_features in vhost_dev_init(), then the reconnect will fail
and it will not be retriggered forever.
The reason is:
When the vhost-user fails at get_features, the vhost_dev_cleanup will be called
immediately.
vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'.
The reconnect path is:
vhost_user_blk_event
vhost_user_async_close(.. vhost_user_blk_disconnect ..)
qemu_chr_fe_set_handlers <----- clear the notifier callback
schedule vhost_user_async_close_bh
The vhost->vdev is null, so the vhost_user_blk_disconnect will not be
called, then the event fd callback will not be reinstalled.
All vhost-user devices have this issue, including vhost-user-blk/scsi.
With this patch, if the vdev->vdev is null, the fd callback will still
be reinstalled.
Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling")
Signed-off-by: Li Feng <fengli@smartx.com>
---
hw/block/vhost-user-blk.c | 2 +-
hw/scsi/vhost-user-scsi.c | 3 ++-
hw/virtio/vhost-user-gpio.c | 2 +-
hw/virtio/vhost-user.c | 10 ++++++++--
include/hw/virtio/vhost-user.h | 4 +++-
5 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 3c69fa47d5..95c758200d 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -391,7 +391,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
case CHR_EVENT_CLOSED:
/* defer close until later to avoid circular close */
vhost_user_async_close(dev, &s->chardev, &s->dev,
- vhost_user_blk_disconnect);
+ vhost_user_blk_disconnect, vhost_user_blk_event);
break;
case CHR_EVENT_BREAK:
case CHR_EVENT_MUX_IN:
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index a7fa8e8df2..e931df9f5b 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -236,7 +236,8 @@ static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event)
case CHR_EVENT_CLOSED:
/* defer close until later to avoid circular close */
vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev,
- vhost_user_scsi_disconnect);
+ vhost_user_scsi_disconnect,
+ vhost_user_scsi_event);
break;
case CHR_EVENT_BREAK:
case CHR_EVENT_MUX_IN:
diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c
index d9979aa5db..04c2cc79f4 100644
--- a/hw/virtio/vhost-user-gpio.c
+++ b/hw/virtio/vhost-user-gpio.c
@@ -283,7 +283,7 @@ static void vu_gpio_event(void *opaque, QEMUChrEvent event)
case CHR_EVENT_CLOSED:
/* defer close until later to avoid circular close */
vhost_user_async_close(dev, &gpio->chardev, &gpio->vhost_dev,
- vu_gpio_disconnect);
+ vu_gpio_disconnect, vu_gpio_event);
break;
case CHR_EVENT_BREAK:
case CHR_EVENT_MUX_IN:
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 8dcf049d42..9540766dd3 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -2643,6 +2643,7 @@ typedef struct {
DeviceState *dev;
CharBackend *cd;
struct vhost_dev *vhost;
+ IOEventHandler *event_cb;
} VhostAsyncCallback;
static void vhost_user_async_close_bh(void *opaque)
@@ -2657,7 +2658,10 @@ static void vhost_user_async_close_bh(void *opaque)
*/
if (vhost->vdev) {
data->cb(data->dev);
- }
+ } else if (data->event_cb) {
+ qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb,
+ NULL, data->dev, NULL, true);
+ }
g_free(data);
}
@@ -2669,7 +2673,9 @@ static void vhost_user_async_close_bh(void *opaque)
*/
void vhost_user_async_close(DeviceState *d,
CharBackend *chardev, struct vhost_dev *vhost,
- vu_async_close_fn cb)
+ vu_async_close_fn cb,
+ IOEventHandler *event_cb
+ )
{
if (!runstate_check(RUN_STATE_SHUTDOWN)) {
/*
diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
index 191216a74f..5fdc711d4e 100644
--- a/include/hw/virtio/vhost-user.h
+++ b/include/hw/virtio/vhost-user.h
@@ -84,6 +84,8 @@ typedef void (*vu_async_close_fn)(DeviceState *cb);
void vhost_user_async_close(DeviceState *d,
CharBackend *chardev, struct vhost_dev *vhost,
- vu_async_close_fn cb);
+ vu_async_close_fn cb,
+ IOEventHandler *event_cb
+ );
#endif
--
2.41.0
> On Aug 24, 2023, at 3:41 AM, Li Feng <fengli@smartx.com> wrote:
>
> When the vhost-user is reconnecting to the backend, and if the vhost-user fails
> at the get_features in vhost_dev_init(), then the reconnect will fail
> and it will not be retriggered forever.
>
> The reason is:
> When the vhost-user fails at get_features, the vhost_dev_cleanup will be called
> immediately.
>
> vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'.
>
> The reconnect path is:
> vhost_user_blk_event
> vhost_user_async_close(.. vhost_user_blk_disconnect ..)
> qemu_chr_fe_set_handlers <----- clear the notifier callback
> schedule vhost_user_async_close_bh
>
> The vhost->vdev is null, so the vhost_user_blk_disconnect will not be
> called, then the event fd callback will not be reinstalled.
>
> All vhost-user devices have this issue, including vhost-user-blk/scsi.
>
> With this patch, if the vdev->vdev is null, the fd callback will still
> be reinstalled.
>
> Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling")
>
A couple of NITs, otherwise LGTM
Reviewed-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
> Signed-off-by: Li Feng <fengli@smartx.com>
> ---
> hw/block/vhost-user-blk.c | 2 +-
> hw/scsi/vhost-user-scsi.c | 3 ++-
> hw/virtio/vhost-user-gpio.c | 2 +-
> hw/virtio/vhost-user.c | 10 ++++++++--
> include/hw/virtio/vhost-user.h | 4 +++-
> 5 files changed, 15 insertions(+), 6 deletions(-)
>
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index 3c69fa47d5..95c758200d 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -391,7 +391,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
> case CHR_EVENT_CLOSED:
> /* defer close until later to avoid circular close */
> vhost_user_async_close(dev, &s->chardev, &s->dev,
> - vhost_user_blk_disconnect);
> + vhost_user_blk_disconnect, vhost_user_blk_event);
> break;
> case CHR_EVENT_BREAK:
> case CHR_EVENT_MUX_IN:
> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
> index a7fa8e8df2..e931df9f5b 100644
> --- a/hw/scsi/vhost-user-scsi.c
> +++ b/hw/scsi/vhost-user-scsi.c
> @@ -236,7 +236,8 @@ static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event)
> case CHR_EVENT_CLOSED:
> /* defer close until later to avoid circular close */
> vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev,
> - vhost_user_scsi_disconnect);
> + vhost_user_scsi_disconnect,
> + vhost_user_scsi_event);
> break;
> case CHR_EVENT_BREAK:
> case CHR_EVENT_MUX_IN:
> diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c
> index d9979aa5db..04c2cc79f4 100644
> --- a/hw/virtio/vhost-user-gpio.c
> +++ b/hw/virtio/vhost-user-gpio.c
> @@ -283,7 +283,7 @@ static void vu_gpio_event(void *opaque, QEMUChrEvent event)
> case CHR_EVENT_CLOSED:
> /* defer close until later to avoid circular close */
> vhost_user_async_close(dev, &gpio->chardev, &gpio->vhost_dev,
> - vu_gpio_disconnect);
> + vu_gpio_disconnect, vu_gpio_event);
> break;
> case CHR_EVENT_BREAK:
> case CHR_EVENT_MUX_IN:
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 8dcf049d42..9540766dd3 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -2643,6 +2643,7 @@ typedef struct {
> DeviceState *dev;
> CharBackend *cd;
> struct vhost_dev *vhost;
> + IOEventHandler *event_cb;
> } VhostAsyncCallback;
>
> static void vhost_user_async_close_bh(void *opaque)
> @@ -2657,7 +2658,10 @@ static void vhost_user_async_close_bh(void *opaque)
> */
> if (vhost->vdev) {
> data->cb(data->dev);
> - }
> + } else if (data->event_cb) {
> + qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb,
> + NULL, data->dev, NULL, true);
> + }
>
> g_free(data);
> }
> @@ -2669,7 +2673,9 @@ static void vhost_user_async_close_bh(void *opaque)
> */
> void vhost_user_async_close(DeviceState *d,
> CharBackend *chardev, struct vhost_dev *vhost,
> - vu_async_close_fn cb)
> + vu_async_close_fn cb,
> + IOEventHandler *event_cb
Nit: why the newline before the closing parenthesis?
> + )
> {
> if (!runstate_check(RUN_STATE_SHUTDOWN)) {
> /*
> diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
> index 191216a74f..5fdc711d4e 100644
> --- a/include/hw/virtio/vhost-user.h
> +++ b/include/hw/virtio/vhost-user.h
> @@ -84,6 +84,8 @@ typedef void (*vu_async_close_fn)(DeviceState *cb);
>
> void vhost_user_async_close(DeviceState *d,
> CharBackend *chardev, struct vhost_dev *vhost,
> - vu_async_close_fn cb);
> + vu_async_close_fn cb,
> + IOEventHandler *event_cb
Nit: ditto - don’t think we need this newline before );
> + );
>
> #endif
> --
> 2.41.0
>
> On 30 Aug 2023, at 6:11 AM, Raphael Norwitz <raphael.norwitz@nutanix.com> wrote:
>
>
>
>> On Aug 24, 2023, at 3:41 AM, Li Feng <fengli@smartx.com> wrote:
>>
>> When the vhost-user is reconnecting to the backend, and if the vhost-user fails
>> at the get_features in vhost_dev_init(), then the reconnect will fail
>> and it will not be retriggered forever.
>>
>> The reason is:
>> When the vhost-user fails at get_features, the vhost_dev_cleanup will be called
>> immediately.
>>
>> vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'.
>>
>> The reconnect path is:
>> vhost_user_blk_event
>> vhost_user_async_close(.. vhost_user_blk_disconnect ..)
>> qemu_chr_fe_set_handlers <----- clear the notifier callback
>> schedule vhost_user_async_close_bh
>>
>> The vhost->vdev is null, so the vhost_user_blk_disconnect will not be
>> called, then the event fd callback will not be reinstalled.
>>
>> All vhost-user devices have this issue, including vhost-user-blk/scsi.
>>
>> With this patch, if the vdev->vdev is null, the fd callback will still
>> be reinstalled.
>>
>> Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling")
>>
>
> A couple of NITs, otherwise LGTM
>
> Reviewed-by: Raphael Norwitz <raphael.norwitz@nutanix.com <mailto:raphael.norwitz@nutanix.com>>
>
>> Signed-off-by: Li Feng <fengli@smartx.com>
>> ---
>> hw/block/vhost-user-blk.c | 2 +-
>> hw/scsi/vhost-user-scsi.c | 3 ++-
>> hw/virtio/vhost-user-gpio.c | 2 +-
>> hw/virtio/vhost-user.c | 10 ++++++++--
>> include/hw/virtio/vhost-user.h | 4 +++-
>> 5 files changed, 15 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
>> index 3c69fa47d5..95c758200d 100644
>> --- a/hw/block/vhost-user-blk.c
>> +++ b/hw/block/vhost-user-blk.c
>> @@ -391,7 +391,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
>> case CHR_EVENT_CLOSED:
>> /* defer close until later to avoid circular close */
>> vhost_user_async_close(dev, &s->chardev, &s->dev,
>> - vhost_user_blk_disconnect);
>> + vhost_user_blk_disconnect, vhost_user_blk_event);
>> break;
>> case CHR_EVENT_BREAK:
>> case CHR_EVENT_MUX_IN:
>> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
>> index a7fa8e8df2..e931df9f5b 100644
>> --- a/hw/scsi/vhost-user-scsi.c
>> +++ b/hw/scsi/vhost-user-scsi.c
>> @@ -236,7 +236,8 @@ static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event)
>> case CHR_EVENT_CLOSED:
>> /* defer close until later to avoid circular close */
>> vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev,
>> - vhost_user_scsi_disconnect);
>> + vhost_user_scsi_disconnect,
>> + vhost_user_scsi_event);
>> break;
>> case CHR_EVENT_BREAK:
>> case CHR_EVENT_MUX_IN:
>> diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c
>> index d9979aa5db..04c2cc79f4 100644
>> --- a/hw/virtio/vhost-user-gpio.c
>> +++ b/hw/virtio/vhost-user-gpio.c
>> @@ -283,7 +283,7 @@ static void vu_gpio_event(void *opaque, QEMUChrEvent event)
>> case CHR_EVENT_CLOSED:
>> /* defer close until later to avoid circular close */
>> vhost_user_async_close(dev, &gpio->chardev, &gpio->vhost_dev,
>> - vu_gpio_disconnect);
>> + vu_gpio_disconnect, vu_gpio_event);
>> break;
>> case CHR_EVENT_BREAK:
>> case CHR_EVENT_MUX_IN:
>> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
>> index 8dcf049d42..9540766dd3 100644
>> --- a/hw/virtio/vhost-user.c
>> +++ b/hw/virtio/vhost-user.c
>> @@ -2643,6 +2643,7 @@ typedef struct {
>> DeviceState *dev;
>> CharBackend *cd;
>> struct vhost_dev *vhost;
>> + IOEventHandler *event_cb;
>> } VhostAsyncCallback;
>>
>> static void vhost_user_async_close_bh(void *opaque)
>> @@ -2657,7 +2658,10 @@ static void vhost_user_async_close_bh(void *opaque)
>> */
>> if (vhost->vdev) {
>> data->cb(data->dev);
>> - }
>> + } else if (data->event_cb) {
>> + qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb,
>> + NULL, data->dev, NULL, true);
>> + }
>>
>> g_free(data);
>> }
>> @@ -2669,7 +2673,9 @@ static void vhost_user_async_close_bh(void *opaque)
>> */
>> void vhost_user_async_close(DeviceState *d,
>> CharBackend *chardev, struct vhost_dev *vhost,
>> - vu_async_close_fn cb)
>> + vu_async_close_fn cb,
>> + IOEventHandler *event_cb
>
> Nit: why the newline before the closing parenthesis?
Acked.
>
>> + )
>> {
>> if (!runstate_check(RUN_STATE_SHUTDOWN)) {
>> /*
>> diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
>> index 191216a74f..5fdc711d4e 100644
>> --- a/include/hw/virtio/vhost-user.h
>> +++ b/include/hw/virtio/vhost-user.h
>> @@ -84,6 +84,8 @@ typedef void (*vu_async_close_fn)(DeviceState *cb);
>>
>> void vhost_user_async_close(DeviceState *d,
>> CharBackend *chardev, struct vhost_dev *vhost,
>> - vu_async_close_fn cb);
>> + vu_async_close_fn cb,
>> + IOEventHandler *event_cb
>
> Nit: ditto - don’t think we need this newline before );
Acked.
>
>> + );
>>
>> #endif
>> --
>> 2.41.0
© 2016 - 2026 Red Hat, Inc.