[PATCH] vhost-user: use new thread to loop backend channel.

Rma Ma posted 1 patch 10 months, 3 weeks ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20230629065415.8211-1-rma.ma@jaguarmicro.com
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>
hw/virtio/vhost-user.c | 67 ++++++++++++++++++++++++++++++++++++++----
1 file changed, 61 insertions(+), 6 deletions(-)
[PATCH] vhost-user: use new thread to loop backend channel.
Posted by Rma Ma 10 months, 3 weeks ago
fix: QEMU deadlock with dpdk-vdpa

QEMU start vhost-user with modern net and blk, backend use dpdk-vdpa process,
after live migration, dest QEMU deadlock with dpdk-vdpa

- QEMU sends VHOST_USER_SET_VRING_KICK to dpdk-vdpa net
- QEMU does not need to wait for a response to this message
- QEMU then sends VHOST_USER_SET_MEM_TABLE to dpdk-vdpa blk
- QEMU needs to wait reply in this message
- when dpdk-vdpa recv VHOST_USER_SET_VRING_KICK, it will send VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG to QEMU
- dpdk-vdpa needs to wait for a response to this message
- since QEMU vhost_user_read and backend channel are synchronous in the same thread
- QEMU will deadlock with dpdk-vdpa

Signed-off-by: Rma Ma <rma.ma@jaguarmicro.com>
---
 hw/virtio/vhost-user.c | 67 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 61 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c 
index c4e0cbd702..1f6b3a5a63 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -274,6 +274,17 @@ struct scrub_regions {
     int fd_idx;
 };
 
+struct backend_thread {
+    QemuThread thread;
+    QemuSemaphore init_done_sem;
+    int thread_id;
+    GMainContext *ctxt;
+    GMainLoop *loop;
+};
+
+static struct backend_thread *backend_th;
+static bool backend_thread_run;
+
 static bool ioeventfd_enabled(void)
 {
     return !kvm_enabled() || kvm_eventfds_enabled();
@@ -1696,7 +1707,8 @@ fdcleanup:
     return rc;
 }
 
-static int vhost_setup_backend_channel(struct vhost_dev *dev)
+static int vhost_setup_backend_channel(struct vhost_dev *dev,
+                                       GMainContext *ctxt)
 {
     VhostUserMsg msg = {
         .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD,
@@ -1728,7 +1740,7 @@ static int vhost_setup_backend_channel(struct vhost_dev *dev)
     u->backend_ioc = ioc;
     u->backend_src = qio_channel_add_watch_source(u->backend_ioc,
                                                 G_IO_IN | G_IO_HUP,
-                                                backend_read, dev, NULL, NULL);
+                                                backend_read, dev, NULL, ctxt);
 
     if (reply_supported) {
         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
@@ -1981,6 +1993,42 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
     return 0;
 }
 
+static void *vhost_backend_channel_worker(void *opaque)
+{
+    struct backend_thread *backend_th = opaque;
+
+    rcu_register_thread();
+
+    backend_th->ctxt = g_main_context_new();
+    backend_th->loop = g_main_loop_new(backend_th->ctxt, false);
+    backend_th->thread_id = qemu_get_thread_id();
+
+    qemu_sem_post(&backend_th->init_done_sem);
+
+    g_main_loop_run(backend_th->loop);
+
+    g_main_loop_unref(backend_th->loop);
+    g_main_context_unref(backend_th->ctxt);
+    g_free(backend_th);
+    rcu_unregister_thread();
+    return NULL;
+}
+
+static void vhost_backend_thread_init(void)
+{
+    backend_th = g_malloc0(sizeof(struct backend_thread));
+    backend_th->thread_id = -1;
+    qemu_sem_init(&backend_th->init_done_sem, 0);
+
+    qemu_thread_create(&backend_th->thread, "backend-channel-worker",
+                       vhost_backend_channel_worker, backend_th,
+                       QEMU_THREAD_DETACHED);
+
+    while (backend_th->thread_id == -1) {
+        qemu_sem_wait(&backend_th->init_done_sem);
+    }
+}
+
 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
                                    Error **errp)
 {
@@ -2108,10 +2156,17 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
     }
 
     if (dev->vq_index == 0) {
-        err = vhost_setup_backend_channel(dev);
-        if (err < 0) {
-            error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
-            return -EPROTO;
+        if (!backend_thread_run) {
+            vhost_backend_thread_init();
+            backend_thread_run = true;
+        }
+
+        if (backend_thread_run && backend_th) {
+            err = vhost_setup_backend_channel(dev, backend_th->ctxt);
+            if (err < 0) {
+                error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
+                return -EPROTO;
+            }
         }
     }
 
-- 
2.17.1
Re: [PATCH] vhost-user: use new thread to loop backend channel.
Posted by Michael S. Tsirkin 10 months, 3 weeks ago
On Thu, Jun 29, 2023 at 02:54:15PM +0800, Rma Ma wrote:
> fix: QEMU deadlock with dpdk-vdpa
> 
> QEMU start vhost-user with modern net and blk, backend use dpdk-vdpa process,
> after live migration, dest QEMU deadlock with dpdk-vdpa
> 
> - QEMU sends VHOST_USER_SET_VRING_KICK to dpdk-vdpa net
> - QEMU does not need to wait for a response to this message
> - QEMU then sends VHOST_USER_SET_MEM_TABLE to dpdk-vdpa blk
> - QEMU needs to wait reply in this message
> - when dpdk-vdpa recv VHOST_USER_SET_VRING_KICK, it will send VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG to QEMU
> - dpdk-vdpa needs to wait for a response to this message
> - since QEMU vhost_user_read and backend channel are synchronous in the same thread
> - QEMU will deadlock with dpdk-vdpa
> 
> Signed-off-by: Rma Ma <rma.ma@jaguarmicro.com>

Hmm this will need some thought. I'm concerned that this
is making what was previously a simple synchronous
code, multithreaded.

Also I feel dpdk needs to fix this too.

Let's document in the doc that when backend is allowed to send messages
(VHOST_USER_PROTOCOL_F_BACKEND_REQ) , neither side is allowed to block
processing incoming messsages while waiting for response.


> ---
>  hw/virtio/vhost-user.c | 67 ++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 61 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c 
> index c4e0cbd702..1f6b3a5a63 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -274,6 +274,17 @@ struct scrub_regions {
>      int fd_idx;
>  };
>  
> +struct backend_thread {
> +    QemuThread thread;
> +    QemuSemaphore init_done_sem;
> +    int thread_id;
> +    GMainContext *ctxt;
> +    GMainLoop *loop;
> +};
> +
> +static struct backend_thread *backend_th;
> +static bool backend_thread_run;
> +
>  static bool ioeventfd_enabled(void)
>  {
>      return !kvm_enabled() || kvm_eventfds_enabled();
> @@ -1696,7 +1707,8 @@ fdcleanup:
>      return rc;
>  }
>  
> -static int vhost_setup_backend_channel(struct vhost_dev *dev)
> +static int vhost_setup_backend_channel(struct vhost_dev *dev,
> +                                       GMainContext *ctxt)
>  {
>      VhostUserMsg msg = {
>          .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD,
> @@ -1728,7 +1740,7 @@ static int vhost_setup_backend_channel(struct vhost_dev *dev)
>      u->backend_ioc = ioc;
>      u->backend_src = qio_channel_add_watch_source(u->backend_ioc,
>                                                  G_IO_IN | G_IO_HUP,
> -                                                backend_read, dev, NULL, NULL);
> +                                                backend_read, dev, NULL, ctxt);
>  
>      if (reply_supported) {
>          msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
> @@ -1981,6 +1993,42 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
>      return 0;
>  }
>  
> +static void *vhost_backend_channel_worker(void *opaque)
> +{
> +    struct backend_thread *backend_th = opaque;
> +
> +    rcu_register_thread();
> +
> +    backend_th->ctxt = g_main_context_new();
> +    backend_th->loop = g_main_loop_new(backend_th->ctxt, false);
> +    backend_th->thread_id = qemu_get_thread_id();
> +
> +    qemu_sem_post(&backend_th->init_done_sem);
> +
> +    g_main_loop_run(backend_th->loop);
> +
> +    g_main_loop_unref(backend_th->loop);
> +    g_main_context_unref(backend_th->ctxt);
> +    g_free(backend_th);
> +    rcu_unregister_thread();
> +    return NULL;
> +}
> +
> +static void vhost_backend_thread_init(void)
> +{
> +    backend_th = g_malloc0(sizeof(struct backend_thread));
> +    backend_th->thread_id = -1;
> +    qemu_sem_init(&backend_th->init_done_sem, 0);
> +
> +    qemu_thread_create(&backend_th->thread, "backend-channel-worker",
> +                       vhost_backend_channel_worker, backend_th,
> +                       QEMU_THREAD_DETACHED);
> +
> +    while (backend_th->thread_id == -1) {
> +        qemu_sem_wait(&backend_th->init_done_sem);
> +    }
> +}
> +
>  static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
>                                     Error **errp)
>  {
> @@ -2108,10 +2156,17 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
>      }
>  
>      if (dev->vq_index == 0) {
> -        err = vhost_setup_backend_channel(dev);
> -        if (err < 0) {
> -            error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
> -            return -EPROTO;
> +        if (!backend_thread_run) {
> +            vhost_backend_thread_init();
> +            backend_thread_run = true;
> +        }
> +
> +        if (backend_thread_run && backend_th) {
> +            err = vhost_setup_backend_channel(dev, backend_th->ctxt);
> +            if (err < 0) {
> +                error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
> +                return -EPROTO;
> +            }
>          }
>      }
>  
> -- 
> 2.17.1
回复: [PATCH] vhost-user: use new thread to loop backend channel.
Posted by Rma Ma 10 months, 3 weeks ago
When the vm is created,it will process vhost_dev_start in kvm_vcpu_thread_fn,
and backend channel in main thread,it seems that there is no multithreaded synchronous problem

after live migration, it will process vhost_dev_start in os_host_main_loop_wait,
and backend channel is also in main thread, it will risk of potential deadlock

so i think backend channel in separate thread can fix deadlock

does it really exist multithreaded synchronous problem while backend channel in separate thread?

________________________________
发件人: Michael S. Tsirkin <mst@redhat.com>
发送时间: 2023年6月29日 15:13
收件人: Rma Ma <rma.ma@jaguarmicro.com>
抄送: qemu-devel <qemu-devel@nongnu.org>
主题: Re: [PATCH] vhost-user: use new thread to loop backend channel.

On Thu, Jun 29, 2023 at 02:54:15PM +0800, Rma Ma wrote:
> fix: QEMU deadlock with dpdk-vdpa
>
> QEMU start vhost-user with modern net and blk, backend use dpdk-vdpa process,
> after live migration, dest QEMU deadlock with dpdk-vdpa
>
> - QEMU sends VHOST_USER_SET_VRING_KICK to dpdk-vdpa net
> - QEMU does not need to wait for a response to this message
> - QEMU then sends VHOST_USER_SET_MEM_TABLE to dpdk-vdpa blk
> - QEMU needs to wait reply in this message
> - when dpdk-vdpa recv VHOST_USER_SET_VRING_KICK, it will send VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG to QEMU
> - dpdk-vdpa needs to wait for a response to this message
> - since QEMU vhost_user_read and backend channel are synchronous in the same thread
> - QEMU will deadlock with dpdk-vdpa
>
> Signed-off-by: Rma Ma <rma.ma@jaguarmicro.com>

Hmm this will need some thought. I'm concerned that this
is making what was previously a simple synchronous
code, multithreaded.

Also I feel dpdk needs to fix this too.

Let's document in the doc that when backend is allowed to send messages
(VHOST_USER_PROTOCOL_F_BACKEND_REQ) , neither side is allowed to block
processing incoming messsages while waiting for response.


> ---
>  hw/virtio/vhost-user.c | 67 ++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 61 insertions(+), 6 deletions(-)
>
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index c4e0cbd702..1f6b3a5a63 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -274,6 +274,17 @@ struct scrub_regions {
>      int fd_idx;
>  };
>
> +struct backend_thread {
> +    QemuThread thread;
> +    QemuSemaphore init_done_sem;
> +    int thread_id;
> +    GMainContext *ctxt;
> +    GMainLoop *loop;
> +};
> +
> +static struct backend_thread *backend_th;
> +static bool backend_thread_run;
> +
>  static bool ioeventfd_enabled(void)
>  {
>      return !kvm_enabled() || kvm_eventfds_enabled();
> @@ -1696,7 +1707,8 @@ fdcleanup:
>      return rc;
>  }
>
> -static int vhost_setup_backend_channel(struct vhost_dev *dev)
> +static int vhost_setup_backend_channel(struct vhost_dev *dev,
> +                                       GMainContext *ctxt)
>  {
>      VhostUserMsg msg = {
>          .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD,
> @@ -1728,7 +1740,7 @@ static int vhost_setup_backend_channel(struct vhost_dev *dev)
>      u->backend_ioc = ioc;
>      u->backend_src = qio_channel_add_watch_source(u->backend_ioc,
>                                                  G_IO_IN | G_IO_HUP,
> -                                                backend_read, dev, NULL, NULL);
> +                                                backend_read, dev, NULL, ctxt);
>
>      if (reply_supported) {
>          msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
> @@ -1981,6 +1993,42 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
>      return 0;
>  }
>
> +static void *vhost_backend_channel_worker(void *opaque)
> +{
> +    struct backend_thread *backend_th = opaque;
> +
> +    rcu_register_thread();
> +
> +    backend_th->ctxt = g_main_context_new();
> +    backend_th->loop = g_main_loop_new(backend_th->ctxt, false);
> +    backend_th->thread_id = qemu_get_thread_id();
> +
> +    qemu_sem_post(&backend_th->init_done_sem);
> +
> +    g_main_loop_run(backend_th->loop);
> +
> +    g_main_loop_unref(backend_th->loop);
> +    g_main_context_unref(backend_th->ctxt);
> +    g_free(backend_th);
> +    rcu_unregister_thread();
> +    return NULL;
> +}
> +
> +static void vhost_backend_thread_init(void)
> +{
> +    backend_th = g_malloc0(sizeof(struct backend_thread));
> +    backend_th->thread_id = -1;
> +    qemu_sem_init(&backend_th->init_done_sem, 0);
> +
> +    qemu_thread_create(&backend_th->thread, "backend-channel-worker",
> +                       vhost_backend_channel_worker, backend_th,
> +                       QEMU_THREAD_DETACHED);
> +
> +    while (backend_th->thread_id == -1) {
> +        qemu_sem_wait(&backend_th->init_done_sem);
> +    }
> +}
> +
>  static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
>                                     Error **errp)
>  {
> @@ -2108,10 +2156,17 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
>      }
>
>      if (dev->vq_index == 0) {
> -        err = vhost_setup_backend_channel(dev);
> -        if (err < 0) {
> -            error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
> -            return -EPROTO;
> +        if (!backend_thread_run) {
> +            vhost_backend_thread_init();
> +            backend_thread_run = true;
> +        }
> +
> +        if (backend_thread_run && backend_th) {
> +            err = vhost_setup_backend_channel(dev, backend_th->ctxt);
> +            if (err < 0) {
> +                error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
> +                return -EPROTO;
> +            }
>          }
>      }
>
> --
> 2.17.1