[PATCH V2] virtio/vhost-user: fix qemu abort when hotunplug vhost-user-net device

yaozhenguo posted 1 patch 2 weeks, 3 days ago
hw/virtio/vhost-user.c         | 39 +++++++++++++++++++++------------------
include/hw/virtio/vhost-user.h |  1 +
2 files changed, 22 insertions(+), 18 deletions(-)
[PATCH V2] virtio/vhost-user: fix qemu abort when hotunplug vhost-user-net device
Posted by yaozhenguo 2 weeks, 3 days ago
During the process of hot-unplug in vhost-user-net NIC, vhost_user_cleanup
may add same rcu node to rcu list. Function calls are as follows:

vhost_user_cleanup
    ->vhost_user_host_notifier_remove
        ->call_rcu(n, vhost_user_host_notifier_free, rcu);
    ->g_free_rcu(n, rcu);

When this happens, QEMU will abort in try_dequeue:

if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) {
    abort();
}

Backtrace is as follows:
0  __pthread_kill_implementation () at /usr/lib64/libc.so.6
1  raise () at /usr/lib64/libc.so.6
2  abort () at /usr/lib64/libc.so.6
3  try_dequeue () at ../util/rcu.c:235
4  call_rcu_thread (0) at ../util/rcu.c:288
5  qemu_thread_start (0) at ../util/qemu-thread-posix.c:541
6  start_thread () at /usr/lib64/libc.so.6
7  clone3 () at /usr/lib64/libc.so.6

Reason for the abort is that adding two identical nodes to the rcu list will
cause it becomes a ring. After dummy node is added to the tail of the list in
try_dequeue, the ring is opened. But lead to a situation that only one node is
added to list and rcu_call_count is added twice. This will cause try_dequeue
abort.

This issue happens when n->addr != 0 in vhost_user_host_notifier_remove. It can
happens in a hotplug stress test with a 32queue vhost-user-net type NIC.
Because n->addr is set in VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG function.
during device hotplug process and it is cleared in vhost_dev_stop during device
hot-unplug. Since VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG is a message sent
by DPDK to qemu, it is asynchronous. So, there is no guaranteed order between
the two processes of setting n->addr and clearing n->addr. If setting n->addr
in hotplug occurs after clearing n->addr in hotunplug, the issue will occur.
So, it is necessary to merge g_free_rcu and vhost_user_host_notifier_free into
one rcu node.

Fixes: 503e355465 ("virtio/vhost-user: dynamically assign VhostUserHostNotifiers")

Signed-off-by: yaozhenguo <yaozhenguo@jd.com>
---

V1->V2:
    add n->addr check in vhost_user_get_vring_base and vhost_user_backend_handle_vring_host_notifier
    to prevent submit same node to rcu list.
   
---
 hw/virtio/vhost-user.c         | 39 +++++++++++++++++++++------------------
 include/hw/virtio/vhost-user.h |  1 +
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 00561da..ba4c09c 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -51,7 +51,6 @@
 #else
 #define VHOST_USER_MAX_RAM_SLOTS 512
 #endif
-
 /*
  * Maximum size of virtio device config space
  */
@@ -1185,9 +1184,16 @@ static int vhost_user_set_vring_num(struct vhost_dev *dev,
 
 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
 {
-    assert(n && n->unmap_addr);
-    munmap(n->unmap_addr, qemu_real_host_page_size());
-    n->unmap_addr = NULL;
+    if (n->unmap_addr) {
+        munmap(n->unmap_addr, qemu_real_host_page_size());
+        n->unmap_addr = NULL;
+    }
+    if (n->need_free) {
+        memory_region_transaction_begin();
+        object_unparent(OBJECT(&n->mr));
+        memory_region_transaction_commit();
+        g_free(n);
+    }
 }
 
 /*
@@ -1195,17 +1201,20 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
  * under rcu.
  */
 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
-                                            VirtIODevice *vdev)
+                                            VirtIODevice *vdev, bool free)
 {
     if (n->addr) {
         if (vdev) {
+            memory_region_transaction_begin();
             virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
+            memory_region_transaction_commit();
         }
         assert(!n->unmap_addr);
         n->unmap_addr = n->addr;
         n->addr = NULL;
-        call_rcu(n, vhost_user_host_notifier_free, rcu);
     }
+    n->need_free = free;
+    call_rcu(n, vhost_user_host_notifier_free, rcu);
 }
 
 static int vhost_user_set_vring_base(struct vhost_dev *dev,
@@ -1279,8 +1288,8 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
     struct vhost_user *u = dev->opaque;
 
     VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
-    if (n) {
-        vhost_user_host_notifier_remove(n, dev->vdev);
+    if (n && n->addr) {
+        vhost_user_host_notifier_remove(n, dev->vdev, false);
     }
 
     ret = vhost_user_write(dev, &msg, NULL, 0);
@@ -1562,7 +1571,9 @@ static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev,
      * new mapped address.
      */
     n = fetch_or_create_notifier(user, queue_idx);
-    vhost_user_host_notifier_remove(n, vdev);
+    if (n && n->addr) {
+        vhost_user_host_notifier_remove(n, vdev, false);
+    }
 
     if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
         return 0;
@@ -2737,13 +2748,7 @@ static void vhost_user_state_destroy(gpointer data)
 {
     VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
     if (n) {
-        vhost_user_host_notifier_remove(n, NULL);
-        object_unparent(OBJECT(&n->mr));
-        /*
-         * We can't free until vhost_user_host_notifier_remove has
-         * done it's thing so schedule the free with RCU.
-         */
-        g_free_rcu(n, rcu);
+        vhost_user_host_notifier_remove(n, NULL, true);
     }
 }
 
@@ -2765,9 +2770,7 @@ void vhost_user_cleanup(VhostUserState *user)
     if (!user->chr) {
         return;
     }
-    memory_region_transaction_begin();
     user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
-    memory_region_transaction_commit();
     user->chr = NULL;
 }
 
diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
index 324cd86..a171f29 100644
--- a/include/hw/virtio/vhost-user.h
+++ b/include/hw/virtio/vhost-user.h
@@ -54,6 +54,7 @@ typedef struct VhostUserHostNotifier {
     void *addr;
     void *unmap_addr;
     int idx;
+    bool need_free;
 } VhostUserHostNotifier;
 
 /**
-- 
1.8.3.1
Re: [PATCH V2] virtio/vhost-user: fix qemu abort when hotunplug vhost-user-net device
Posted by Stefano Garzarella 4 days, 23 hours ago
On Thu, Sep 19, 2024 at 03:29:44PM GMT, yaozhenguo wrote:
>During the process of hot-unplug in vhost-user-net NIC, vhost_user_cleanup
>may add same rcu node to rcu list. Function calls are as follows:
>
>vhost_user_cleanup
>    ->vhost_user_host_notifier_remove
>        ->call_rcu(n, vhost_user_host_notifier_free, rcu);
>    ->g_free_rcu(n, rcu);
>
>When this happens, QEMU will abort in try_dequeue:
>
>if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) {
>    abort();
>}
>
>Backtrace is as follows:
>0  __pthread_kill_implementation () at /usr/lib64/libc.so.6
>1  raise () at /usr/lib64/libc.so.6
>2  abort () at /usr/lib64/libc.so.6
>3  try_dequeue () at ../util/rcu.c:235
>4  call_rcu_thread (0) at ../util/rcu.c:288
>5  qemu_thread_start (0) at ../util/qemu-thread-posix.c:541
>6  start_thread () at /usr/lib64/libc.so.6
>7  clone3 () at /usr/lib64/libc.so.6
>
>Reason for the abort is that adding two identical nodes to the rcu list will
>cause it becomes a ring. After dummy node is added to the tail of the list in
>try_dequeue, the ring is opened. But lead to a situation that only one node is
>added to list and rcu_call_count is added twice. This will cause try_dequeue
>abort.
>
>This issue happens when n->addr != 0 in vhost_user_host_notifier_remove. It can
>happens in a hotplug stress test with a 32queue vhost-user-net type NIC.
>Because n->addr is set in VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG function.
>during device hotplug process and it is cleared in vhost_dev_stop during device
>hot-unplug. Since VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG is a message sent
>by DPDK to qemu, it is asynchronous. So, there is no guaranteed order between
>the two processes of setting n->addr and clearing n->addr. If setting n->addr
>in hotplug occurs after clearing n->addr in hotunplug, the issue will occur.
>So, it is necessary to merge g_free_rcu and vhost_user_host_notifier_free into
>one rcu node.
>
>Fixes: 503e355465 ("virtio/vhost-user: dynamically assign VhostUserHostNotifiers")
>
>Signed-off-by: yaozhenguo <yaozhenguo@jd.com>
>---
>
>V1->V2:
>    add n->addr check in vhost_user_get_vring_base and vhost_user_backend_handle_vring_host_notifier
>    to prevent submit same node to rcu list.
>
>---
> hw/virtio/vhost-user.c         | 39 +++++++++++++++++++++------------------
> include/hw/virtio/vhost-user.h |  1 +
> 2 files changed, 22 insertions(+), 18 deletions(-)
>
>diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
>index 00561da..ba4c09c 100644
>--- a/hw/virtio/vhost-user.c
>+++ b/hw/virtio/vhost-user.c
>@@ -51,7 +51,6 @@
> #else
> #define VHOST_USER_MAX_RAM_SLOTS 512
> #endif
>-
> /*
>  * Maximum size of virtio device config space
>  */
>@@ -1185,9 +1184,16 @@ static int vhost_user_set_vring_num(struct vhost_dev *dev,
>
> static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
> {
>-    assert(n && n->unmap_addr);
>-    munmap(n->unmap_addr, qemu_real_host_page_size());
>-    n->unmap_addr = NULL;
>+    if (n->unmap_addr) {
>+        munmap(n->unmap_addr, qemu_real_host_page_size());
>+        n->unmap_addr = NULL;
>+    }
>+    if (n->need_free) {
>+        memory_region_transaction_begin();
>+        object_unparent(OBJECT(&n->mr));
>+        memory_region_transaction_commit();
>+        g_free(n);
>+    }
> }
>
> /*
>@@ -1195,17 +1201,20 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
>  * under rcu.
>  */
> static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
>-                                            VirtIODevice *vdev)
>+                                            VirtIODevice *vdev, bool free)

What about `destroy` instead of `free`?

In that way is more clear that it should be true when called by
`vhost_user_state_destroy()`.

> {
>     if (n->addr) {
>         if (vdev) {
>+            memory_region_transaction_begin();
>             virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
>+            memory_region_transaction_commit();
>         }
>         assert(!n->unmap_addr);
>         n->unmap_addr = n->addr;
>         n->addr = NULL;
>-        call_rcu(n, vhost_user_host_notifier_free, rcu);
>     }

Instead of checking n->addr in the caller, I suggest to move the check
here:

       if (destroy || n->unmap_addr) {
           s->destroy = destroy;
           call_rcu(n, vhost_user_host_notifier_free, rcu);
       }

Thanks,
Stefano

>+    n->need_free = free;
>+    call_rcu(n, vhost_user_host_notifier_free, rcu);
> }
>
> static int vhost_user_set_vring_base(struct vhost_dev *dev,
>@@ -1279,8 +1288,8 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
>     struct vhost_user *u = dev->opaque;
>
>     VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
>-    if (n) {
>-        vhost_user_host_notifier_remove(n, dev->vdev);
>+    if (n && n->addr) {
>+        vhost_user_host_notifier_remove(n, dev->vdev, false);
>     }
>
>     ret = vhost_user_write(dev, &msg, NULL, 0);
>@@ -1562,7 +1571,9 @@ static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev,
>      * new mapped address.
>      */
>     n = fetch_or_create_notifier(user, queue_idx);
>-    vhost_user_host_notifier_remove(n, vdev);
>+    if (n && n->addr) {
>+        vhost_user_host_notifier_remove(n, vdev, false);
>+    }
>
>     if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
>         return 0;
>@@ -2737,13 +2748,7 @@ static void vhost_user_state_destroy(gpointer data)
> {
>     VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
>     if (n) {
>-        vhost_user_host_notifier_remove(n, NULL);
>-        object_unparent(OBJECT(&n->mr));
>-        /*
>-         * We can't free until vhost_user_host_notifier_remove has
>-         * done it's thing so schedule the free with RCU.
>-         */
>-        g_free_rcu(n, rcu);
>+        vhost_user_host_notifier_remove(n, NULL, true);
>     }
> }
>
>@@ -2765,9 +2770,7 @@ void vhost_user_cleanup(VhostUserState *user)
>     if (!user->chr) {
>         return;
>     }
>-    memory_region_transaction_begin();
>     user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
>-    memory_region_transaction_commit();
>     user->chr = NULL;
> }
>
>diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
>index 324cd86..a171f29 100644
>--- a/include/hw/virtio/vhost-user.h
>+++ b/include/hw/virtio/vhost-user.h
>@@ -54,6 +54,7 @@ typedef struct VhostUserHostNotifier {
>     void *addr;
>     void *unmap_addr;
>     int idx;
>+    bool need_free;
> } VhostUserHostNotifier;
>
> /**
>-- 
>1.8.3.1
>