[PATCH] virtio-scsi: fix race in virtio_scsi_dataplane_start()

Stefan Hajnoczi posted 1 patch 1 week, 1 day ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20220803162824.948023-1-stefanha@redhat.com
Maintainers: Paolo Bonzini <pbonzini@redhat.com>, Fam Zheng <fam@euphon.net>, "Michael S. Tsirkin" <mst@redhat.com>
hw/scsi/virtio-scsi-dataplane.c | 33 +++++++++++++++++++++++++--------
1 file changed, 25 insertions(+), 8 deletions(-)
[PATCH] virtio-scsi: fix race in virtio_scsi_dataplane_start()
Posted by Stefan Hajnoczi 1 week, 1 day ago
As soon as virtio_scsi_data_plane_start() attaches host notifiers the
IOThread may start virtqueue processing. There is a race between
IOThread virtqueue processing and virtio_scsi_data_plane_start() because
it only assigns s->dataplane_started after attaching host notifiers.

When a virtqueue handler function in the IOThread calls
virtio_scsi_defer_to_dataplane() it may see !s->dataplane_started and
attempt to start dataplane even though we're already in the IOThread:

  #0  0x00007f67b360857c __pthread_kill_implementation (libc.so.6 + 0xa257c)
  #1  0x00007f67b35bbd56 raise (libc.so.6 + 0x55d56)
  #2  0x00007f67b358e833 abort (libc.so.6 + 0x28833)
  #3  0x00007f67b358e75b __assert_fail_base.cold (libc.so.6 + 0x2875b)
  #4  0x00007f67b35b4cd6 __assert_fail (libc.so.6 + 0x4ecd6)
  #5  0x000055ca87fd411b memory_region_transaction_commit (qemu-kvm + 0x67511b)
  #6  0x000055ca87e17811 virtio_pci_ioeventfd_assign (qemu-kvm + 0x4b8811)
  #7  0x000055ca87e14836 virtio_bus_set_host_notifier (qemu-kvm + 0x4b5836)
  #8  0x000055ca87f8e14e virtio_scsi_set_host_notifier (qemu-kvm + 0x62f14e)
  #9  0x000055ca87f8dd62 virtio_scsi_dataplane_start (qemu-kvm + 0x62ed62)
  #10 0x000055ca87e14610 virtio_bus_start_ioeventfd (qemu-kvm + 0x4b5610)
  #11 0x000055ca87f8c29a virtio_scsi_handle_ctrl (qemu-kvm + 0x62d29a)
  #12 0x000055ca87fa5902 virtio_queue_host_notifier_read (qemu-kvm + 0x646902)
  #13 0x000055ca882c099e aio_dispatch_handler (qemu-kvm + 0x96199e)
  #14 0x000055ca882c1761 aio_poll (qemu-kvm + 0x962761)
  #15 0x000055ca880e1052 iothread_run (qemu-kvm + 0x782052)
  #16 0x000055ca882c562a qemu_thread_start (qemu-kvm + 0x96662a)

This patch assigns s->dataplane_started before attaching host notifiers
so that virtqueue handler functions that run in the IOThread before
virtio_scsi_data_plane_start() returns correctly identify that dataplane
does not need to be started.

Note that s->dataplane_started does not need the AioContext lock because
it is set before attaching host notifiers and cleared after detaching
host notifiers. In other words, the IOThread always sees the value true
and the main loop thread does not modify it while the IOThread is
active.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2099541
Reported-by: Qing Wang <qinwang@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/scsi/virtio-scsi-dataplane.c | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 8bb6e6acfc..a575c3f0cd 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -66,6 +66,21 @@ static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
     return 0;
 }
 
+/* Context: BH in IOThread */
+static void virtio_scsi_dataplane_start_bh(void *opaque)
+{
+    VirtIOSCSI *s = opaque;
+    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
+    int i;
+
+    virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
+    virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
+
+    for (i = 0; i < vs->conf.num_queues; i++) {
+        virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
+    }
+}
+
 /* Context: BH in IOThread */
 static void virtio_scsi_dataplane_stop_bh(void *opaque)
 {
@@ -136,16 +151,18 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
 
     memory_region_transaction_commit();
 
-    aio_context_acquire(s->ctx);
-    virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
-    virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
-
-    for (i = 0; i < vs->conf.num_queues; i++) {
-        virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
-    }
-
     s->dataplane_starting = false;
     s->dataplane_started = true;
+
+    /*
+     * Attach notifiers from within the IOThread. It's possible to attach
+     * notifiers from our thread directly but this approach has the advantages
+     * that virtio_scsi_dataplane_start_bh() is symmetric with
+     * virtio_scsi_dataplane_stop_bh() and the s->dataplane_started assignment
+     * above doesn't require explicit synchronization.
+     */
+    aio_context_acquire(s->ctx);
+    aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_start_bh, s);
     aio_context_release(s->ctx);
     return 0;
 
-- 
2.37.1
Re: [PATCH] virtio-scsi: fix race in virtio_scsi_dataplane_start()
Posted by Paolo Bonzini 6 days, 15 hours ago
On 8/3/22 18:28, Stefan Hajnoczi wrote:
> diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
> index 8bb6e6acfc..a575c3f0cd 100644
> --- a/hw/scsi/virtio-scsi-dataplane.c
> +++ b/hw/scsi/virtio-scsi-dataplane.c
> @@ -66,6 +66,21 @@ static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
>       return 0;
>   }
>   
> +/* Context: BH in IOThread */
> +static void virtio_scsi_dataplane_start_bh(void *opaque)
> +{
> +    VirtIOSCSI *s = opaque;
> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
> +    int i;
> +
> +    virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
> +    virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
> +
> +    for (i = 0; i < vs->conf.num_queues; i++) {
> +        virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
> +    }
> +}
> +
>   /* Context: BH in IOThread */
>   static void virtio_scsi_dataplane_stop_bh(void *opaque)
>   {
> @@ -136,16 +151,18 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
>   
>       memory_region_transaction_commit();
>   
> -    aio_context_acquire(s->ctx);
> -    virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
> -    virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
> -
> -    for (i = 0; i < vs->conf.num_queues; i++) {
> -        virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
> -    }
> -
>       s->dataplane_starting = false;
>       s->dataplane_started = true;
> +
> +    /*
> +     * Attach notifiers from within the IOThread. It's possible to attach
> +     * notifiers from our thread directly but this approach has the advantages
> +     * that virtio_scsi_dataplane_start_bh() is symmetric with
> +     * virtio_scsi_dataplane_stop_bh() and the s->dataplane_started assignment
> +     * above doesn't require explicit synchronization.
> +     */
> +    aio_context_acquire(s->ctx);
> +    aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_start_bh, s);
>       aio_context_release(s->ctx);

Taking the AioContext lock for code that is running in another thread 
seems wrong.  But really there is no need to take the lock: I think it 
wanted to protect against the handler running before 
s->dataplane_starting/started were set, but it's not needed now because 
the iothread is busy running the bottom half.

Also, please do the same in virtio-blk as well.

Thanks,

Paolo
Re: [PATCH] virtio-scsi: fix race in virtio_scsi_dataplane_start()
Posted by Stefan Hajnoczi 6 days, 14 hours ago
On Fri, Aug 5, 2022, 05:59 Paolo Bonzini <pbonzini@redhat.com> wrote:

> On 8/3/22 18:28, Stefan Hajnoczi wrote:
> > diff --git a/hw/scsi/virtio-scsi-dataplane.c
> b/hw/scsi/virtio-scsi-dataplane.c
> > index 8bb6e6acfc..a575c3f0cd 100644
> > --- a/hw/scsi/virtio-scsi-dataplane.c
> > +++ b/hw/scsi/virtio-scsi-dataplane.c
> > @@ -66,6 +66,21 @@ static int virtio_scsi_set_host_notifier(VirtIOSCSI
> *s, VirtQueue *vq, int n)
> >       return 0;
> >   }
> >
> > +/* Context: BH in IOThread */
> > +static void virtio_scsi_dataplane_start_bh(void *opaque)
> > +{
> > +    VirtIOSCSI *s = opaque;
> > +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
> > +    int i;
> > +
> > +    virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
> > +    virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
> > +
> > +    for (i = 0; i < vs->conf.num_queues; i++) {
> > +        virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
> > +    }
> > +}
> > +
> >   /* Context: BH in IOThread */
> >   static void virtio_scsi_dataplane_stop_bh(void *opaque)
> >   {
> > @@ -136,16 +151,18 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
> >
> >       memory_region_transaction_commit();
> >
> > -    aio_context_acquire(s->ctx);
> > -    virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
> > -    virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
> > -
> > -    for (i = 0; i < vs->conf.num_queues; i++) {
> > -        virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
> > -    }
> > -
> >       s->dataplane_starting = false;
> >       s->dataplane_started = true;
> > +
> > +    /*
> > +     * Attach notifiers from within the IOThread. It's possible to
> attach
> > +     * notifiers from our thread directly but this approach has the
> advantages
> > +     * that virtio_scsi_dataplane_start_bh() is symmetric with
> > +     * virtio_scsi_dataplane_stop_bh() and the s->dataplane_started
> assignment
> > +     * above doesn't require explicit synchronization.
> > +     */
> > +    aio_context_acquire(s->ctx);
> > +    aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_start_bh, s);
> >       aio_context_release(s->ctx);
>
> Taking the AioContext lock for code that is running in another thread
> seems wrong.  But really there is no need to take the lock: I think it
> wanted to protect against the handler running before
> s->dataplane_starting/started were set, but it's not needed now because
> the iothread is busy running the bottom half.
>

The lock must be taken solely because aio_wait_bh_oneshot() requires that
the caller holds it. Emanuele is working on removing the lock from
AIO_WAIT_WHILE() but for now it's necessary to hold the lock.


> Also, please do the same in virtio-blk as well.
>

Okay. There are a few other differences between virtio-scsi and virtio-blk
dataplane code that I'll investigate. Maybe they can share common
start/stop functions.

Stefan

>
Re: [PATCH] virtio-scsi: fix race in virtio_scsi_dataplane_start()
Posted by Emanuele Giuseppe Esposito 6 days, 16 hours ago

Am 03/08/2022 um 18:28 schrieb Stefan Hajnoczi:
> As soon as virtio_scsi_data_plane_start() attaches host notifiers the
> IOThread may start virtqueue processing. There is a race between
> IOThread virtqueue processing and virtio_scsi_data_plane_start() because
> it only assigns s->dataplane_started after attaching host notifiers.
> 
> When a virtqueue handler function in the IOThread calls
> virtio_scsi_defer_to_dataplane() it may see !s->dataplane_started and
> attempt to start dataplane even though we're already in the IOThread:
> 
>   #0  0x00007f67b360857c __pthread_kill_implementation (libc.so.6 + 0xa257c)
>   #1  0x00007f67b35bbd56 raise (libc.so.6 + 0x55d56)
>   #2  0x00007f67b358e833 abort (libc.so.6 + 0x28833)
>   #3  0x00007f67b358e75b __assert_fail_base.cold (libc.so.6 + 0x2875b)
>   #4  0x00007f67b35b4cd6 __assert_fail (libc.so.6 + 0x4ecd6)
>   #5  0x000055ca87fd411b memory_region_transaction_commit (qemu-kvm + 0x67511b)
>   #6  0x000055ca87e17811 virtio_pci_ioeventfd_assign (qemu-kvm + 0x4b8811)
>   #7  0x000055ca87e14836 virtio_bus_set_host_notifier (qemu-kvm + 0x4b5836)
>   #8  0x000055ca87f8e14e virtio_scsi_set_host_notifier (qemu-kvm + 0x62f14e)
>   #9  0x000055ca87f8dd62 virtio_scsi_dataplane_start (qemu-kvm + 0x62ed62)
>   #10 0x000055ca87e14610 virtio_bus_start_ioeventfd (qemu-kvm + 0x4b5610)
>   #11 0x000055ca87f8c29a virtio_scsi_handle_ctrl (qemu-kvm + 0x62d29a)
>   #12 0x000055ca87fa5902 virtio_queue_host_notifier_read (qemu-kvm + 0x646902)
>   #13 0x000055ca882c099e aio_dispatch_handler (qemu-kvm + 0x96199e)
>   #14 0x000055ca882c1761 aio_poll (qemu-kvm + 0x962761)
>   #15 0x000055ca880e1052 iothread_run (qemu-kvm + 0x782052)
>   #16 0x000055ca882c562a qemu_thread_start (qemu-kvm + 0x96662a)
> 
> This patch assigns s->dataplane_started before attaching host notifiers
> so that virtqueue handler functions that run in the IOThread before
> virtio_scsi_data_plane_start() returns correctly identify that dataplane
> does not need to be started.
> 
> Note that s->dataplane_started does not need the AioContext lock because
> it is set before attaching host notifiers and cleared after detaching
> host notifiers. In other words, the IOThread always sees the value true
> and the main loop thread does not modify it while the IOThread is
> active.
> 
> Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2099541
> Reported-by: Qing Wang <qinwang@redhat.com>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  hw/scsi/virtio-scsi-dataplane.c | 33 +++++++++++++++++++++++++--------
>  1 file changed, 25 insertions(+), 8 deletions(-)
> 
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Re: [PATCH] virtio-scsi: fix race in virtio_scsi_dataplane_start()
Posted by Michael S. Tsirkin 6 days, 17 hours ago
On Wed, Aug 03, 2022 at 12:28:24PM -0400, Stefan Hajnoczi wrote:
> As soon as virtio_scsi_data_plane_start() attaches host notifiers the
> IOThread may start virtqueue processing. There is a race between
> IOThread virtqueue processing and virtio_scsi_data_plane_start() because
> it only assigns s->dataplane_started after attaching host notifiers.
> 
> When a virtqueue handler function in the IOThread calls
> virtio_scsi_defer_to_dataplane() it may see !s->dataplane_started and
> attempt to start dataplane even though we're already in the IOThread:
> 
>   #0  0x00007f67b360857c __pthread_kill_implementation (libc.so.6 + 0xa257c)
>   #1  0x00007f67b35bbd56 raise (libc.so.6 + 0x55d56)
>   #2  0x00007f67b358e833 abort (libc.so.6 + 0x28833)
>   #3  0x00007f67b358e75b __assert_fail_base.cold (libc.so.6 + 0x2875b)
>   #4  0x00007f67b35b4cd6 __assert_fail (libc.so.6 + 0x4ecd6)
>   #5  0x000055ca87fd411b memory_region_transaction_commit (qemu-kvm + 0x67511b)
>   #6  0x000055ca87e17811 virtio_pci_ioeventfd_assign (qemu-kvm + 0x4b8811)
>   #7  0x000055ca87e14836 virtio_bus_set_host_notifier (qemu-kvm + 0x4b5836)
>   #8  0x000055ca87f8e14e virtio_scsi_set_host_notifier (qemu-kvm + 0x62f14e)
>   #9  0x000055ca87f8dd62 virtio_scsi_dataplane_start (qemu-kvm + 0x62ed62)
>   #10 0x000055ca87e14610 virtio_bus_start_ioeventfd (qemu-kvm + 0x4b5610)
>   #11 0x000055ca87f8c29a virtio_scsi_handle_ctrl (qemu-kvm + 0x62d29a)
>   #12 0x000055ca87fa5902 virtio_queue_host_notifier_read (qemu-kvm + 0x646902)
>   #13 0x000055ca882c099e aio_dispatch_handler (qemu-kvm + 0x96199e)
>   #14 0x000055ca882c1761 aio_poll (qemu-kvm + 0x962761)
>   #15 0x000055ca880e1052 iothread_run (qemu-kvm + 0x782052)
>   #16 0x000055ca882c562a qemu_thread_start (qemu-kvm + 0x96662a)
> 
> This patch assigns s->dataplane_started before attaching host notifiers
> so that virtqueue handler functions that run in the IOThread before
> virtio_scsi_data_plane_start() returns correctly identify that dataplane
> does not need to be started.
> 
> Note that s->dataplane_started does not need the AioContext lock because
> it is set before attaching host notifiers and cleared after detaching
> host notifiers. In other words, the IOThread always sees the value true
> and the main loop thread does not modify it while the IOThread is
> active.
> 
> Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2099541
> Reported-by: Qing Wang <qinwang@redhat.com>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>

A scsi thing that tree seems more appropriate.

Reviewed-by: Michael S. Tsirkin <mst@redhat.com>



> ---
>  hw/scsi/virtio-scsi-dataplane.c | 33 +++++++++++++++++++++++++--------
>  1 file changed, 25 insertions(+), 8 deletions(-)
> 
> diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
> index 8bb6e6acfc..a575c3f0cd 100644
> --- a/hw/scsi/virtio-scsi-dataplane.c
> +++ b/hw/scsi/virtio-scsi-dataplane.c
> @@ -66,6 +66,21 @@ static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
>      return 0;
>  }
>  
> +/* Context: BH in IOThread */
> +static void virtio_scsi_dataplane_start_bh(void *opaque)
> +{
> +    VirtIOSCSI *s = opaque;
> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
> +    int i;
> +
> +    virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
> +    virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
> +
> +    for (i = 0; i < vs->conf.num_queues; i++) {
> +        virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
> +    }
> +}
> +
>  /* Context: BH in IOThread */
>  static void virtio_scsi_dataplane_stop_bh(void *opaque)
>  {
> @@ -136,16 +151,18 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
>  
>      memory_region_transaction_commit();
>  
> -    aio_context_acquire(s->ctx);
> -    virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
> -    virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
> -
> -    for (i = 0; i < vs->conf.num_queues; i++) {
> -        virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
> -    }
> -
>      s->dataplane_starting = false;
>      s->dataplane_started = true;
> +
> +    /*
> +     * Attach notifiers from within the IOThread. It's possible to attach
> +     * notifiers from our thread directly but this approach has the advantages
> +     * that virtio_scsi_dataplane_start_bh() is symmetric with
> +     * virtio_scsi_dataplane_stop_bh() and the s->dataplane_started assignment
> +     * above doesn't require explicit synchronization.
> +     */
> +    aio_context_acquire(s->ctx);
> +    aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_start_bh, s);
>      aio_context_release(s->ctx);
>      return 0;
>  
> -- 
> 2.37.1
Re: [PATCH] virtio-scsi: fix race in virtio_scsi_dataplane_start()
Posted by Paolo Bonzini 6 days, 15 hours ago
On 8/5/22 09:04, Michael S. Tsirkin wrote:
>>
>> Buglink:https://bugzilla.redhat.com/show_bug.cgi?id=2099541
>> Reported-by: Qing Wang<qinwang@redhat.com>
>> Signed-off-by: Stefan Hajnoczi<stefanha@redhat.com>
> A scsi thing that tree seems more appropriate.
> 
> Reviewed-by: Michael S. Tsirkin<mst@redhat.com>
> 
> 
> 

Since the same thing has to be done in virtio-blk, any of the 
block/misc/virtio tree will do.

Paolo
Re: [PATCH] virtio-scsi: fix race in virtio_scsi_dataplane_start()
Posted by Stefan Hajnoczi 3 days, 8 hours ago
On Fri, Aug 05, 2022 at 11:41:27AM +0200, Paolo Bonzini wrote:
> On 8/5/22 09:04, Michael S. Tsirkin wrote:
> > > 
> > > Buglink:https://bugzilla.redhat.com/show_bug.cgi?id=2099541
> > > Reported-by: Qing Wang<qinwang@redhat.com>
> > > Signed-off-by: Stefan Hajnoczi<stefanha@redhat.com>
> > A scsi thing that tree seems more appropriate.
> > 
> > Reviewed-by: Michael S. Tsirkin<mst@redhat.com>
> > 
> > 
> > 
> 
> Since the same thing has to be done in virtio-blk, any of the
> block/misc/virtio tree will do.

I don't think virtio-blk changes are required because it's already safe.

On the store side:

  s->starting = false;
  vblk->dataplane_started = true;
  trace_virtio_blk_data_plane_start(s);

  ...

  /* Get this show started by hooking up our callbacks */
  aio_context_acquire(s->ctx);
  ^^^ implicit memory barrier ^^^
  for (i = 0; i < nvqs; i++) {
      VirtQueue *vq = virtio_get_queue(s->vdev, i);

      virtio_queue_aio_attach_host_notifier(vq, s->ctx);
  }
  aio_context_release(s->ctx);

On the load side:

  void aio_notify_accept(AioContext *ctx)
  {
      qatomic_set(&ctx->notified, false);

      /*
       * Write ctx->notified before reading e.g. bh->flags.  Pairs with smp_wmb
       * in aio_notify.
       */
      smp_mb();
      ^^^^^^^^^
  }

vblk->dataplane_started will be visible to the IOThread.

Stefan
Re: [PATCH 7.1] virtio-scsi: fix race in virtio_scsi_dataplane_start()
Posted by Stefan Hajnoczi 1 week ago
On Wed, Aug 03, 2022 at 12:28:24PM -0400, Stefan Hajnoczi wrote:
> As soon as virtio_scsi_data_plane_start() attaches host notifiers the
> IOThread may start virtqueue processing. There is a race between
> IOThread virtqueue processing and virtio_scsi_data_plane_start() because
> it only assigns s->dataplane_started after attaching host notifiers.
> 
> When a virtqueue handler function in the IOThread calls
> virtio_scsi_defer_to_dataplane() it may see !s->dataplane_started and
> attempt to start dataplane even though we're already in the IOThread:
> 
>   #0  0x00007f67b360857c __pthread_kill_implementation (libc.so.6 + 0xa257c)
>   #1  0x00007f67b35bbd56 raise (libc.so.6 + 0x55d56)
>   #2  0x00007f67b358e833 abort (libc.so.6 + 0x28833)
>   #3  0x00007f67b358e75b __assert_fail_base.cold (libc.so.6 + 0x2875b)
>   #4  0x00007f67b35b4cd6 __assert_fail (libc.so.6 + 0x4ecd6)
>   #5  0x000055ca87fd411b memory_region_transaction_commit (qemu-kvm + 0x67511b)
>   #6  0x000055ca87e17811 virtio_pci_ioeventfd_assign (qemu-kvm + 0x4b8811)
>   #7  0x000055ca87e14836 virtio_bus_set_host_notifier (qemu-kvm + 0x4b5836)
>   #8  0x000055ca87f8e14e virtio_scsi_set_host_notifier (qemu-kvm + 0x62f14e)
>   #9  0x000055ca87f8dd62 virtio_scsi_dataplane_start (qemu-kvm + 0x62ed62)
>   #10 0x000055ca87e14610 virtio_bus_start_ioeventfd (qemu-kvm + 0x4b5610)
>   #11 0x000055ca87f8c29a virtio_scsi_handle_ctrl (qemu-kvm + 0x62d29a)
>   #12 0x000055ca87fa5902 virtio_queue_host_notifier_read (qemu-kvm + 0x646902)
>   #13 0x000055ca882c099e aio_dispatch_handler (qemu-kvm + 0x96199e)
>   #14 0x000055ca882c1761 aio_poll (qemu-kvm + 0x962761)
>   #15 0x000055ca880e1052 iothread_run (qemu-kvm + 0x782052)
>   #16 0x000055ca882c562a qemu_thread_start (qemu-kvm + 0x96662a)
> 
> This patch assigns s->dataplane_started before attaching host notifiers
> so that virtqueue handler functions that run in the IOThread before
> virtio_scsi_data_plane_start() returns correctly identify that dataplane
> does not need to be started.
> 
> Note that s->dataplane_started does not need the AioContext lock because
> it is set before attaching host notifiers and cleared after detaching
> host notifiers. In other words, the IOThread always sees the value true
> and the main loop thread does not modify it while the IOThread is
> active.
> 
> Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2099541
> Reported-by: Qing Wang <qinwang@redhat.com>

Qing Wang has confirmed that this solves the assertion failures.

Paolo/Michael: Can this still be merged for QEMU 7.1?

Stefan
Re: [PATCH 7.1] virtio-scsi: fix race in virtio_scsi_dataplane_start()
Posted by Michael S. Tsirkin 6 days, 17 hours ago
On Thu, Aug 04, 2022 at 02:56:59PM -0400, Stefan Hajnoczi wrote:
> On Wed, Aug 03, 2022 at 12:28:24PM -0400, Stefan Hajnoczi wrote:
> > As soon as virtio_scsi_data_plane_start() attaches host notifiers the
> > IOThread may start virtqueue processing. There is a race between
> > IOThread virtqueue processing and virtio_scsi_data_plane_start() because
> > it only assigns s->dataplane_started after attaching host notifiers.
> > 
> > When a virtqueue handler function in the IOThread calls
> > virtio_scsi_defer_to_dataplane() it may see !s->dataplane_started and
> > attempt to start dataplane even though we're already in the IOThread:
> > 
> >   #0  0x00007f67b360857c __pthread_kill_implementation (libc.so.6 + 0xa257c)
> >   #1  0x00007f67b35bbd56 raise (libc.so.6 + 0x55d56)
> >   #2  0x00007f67b358e833 abort (libc.so.6 + 0x28833)
> >   #3  0x00007f67b358e75b __assert_fail_base.cold (libc.so.6 + 0x2875b)
> >   #4  0x00007f67b35b4cd6 __assert_fail (libc.so.6 + 0x4ecd6)
> >   #5  0x000055ca87fd411b memory_region_transaction_commit (qemu-kvm + 0x67511b)
> >   #6  0x000055ca87e17811 virtio_pci_ioeventfd_assign (qemu-kvm + 0x4b8811)
> >   #7  0x000055ca87e14836 virtio_bus_set_host_notifier (qemu-kvm + 0x4b5836)
> >   #8  0x000055ca87f8e14e virtio_scsi_set_host_notifier (qemu-kvm + 0x62f14e)
> >   #9  0x000055ca87f8dd62 virtio_scsi_dataplane_start (qemu-kvm + 0x62ed62)
> >   #10 0x000055ca87e14610 virtio_bus_start_ioeventfd (qemu-kvm + 0x4b5610)
> >   #11 0x000055ca87f8c29a virtio_scsi_handle_ctrl (qemu-kvm + 0x62d29a)
> >   #12 0x000055ca87fa5902 virtio_queue_host_notifier_read (qemu-kvm + 0x646902)
> >   #13 0x000055ca882c099e aio_dispatch_handler (qemu-kvm + 0x96199e)
> >   #14 0x000055ca882c1761 aio_poll (qemu-kvm + 0x962761)
> >   #15 0x000055ca880e1052 iothread_run (qemu-kvm + 0x782052)
> >   #16 0x000055ca882c562a qemu_thread_start (qemu-kvm + 0x96662a)
> > 
> > This patch assigns s->dataplane_started before attaching host notifiers
> > so that virtqueue handler functions that run in the IOThread before
> > virtio_scsi_data_plane_start() returns correctly identify that dataplane
> > does not need to be started.
> > 
> > Note that s->dataplane_started does not need the AioContext lock because
> > it is set before attaching host notifiers and cleared after detaching
> > host notifiers. In other words, the IOThread always sees the value true
> > and the main loop thread does not modify it while the IOThread is
> > active.
> > 
> > Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2099541
> > Reported-by: Qing Wang <qinwang@redhat.com>
> 
> Qing Wang has confirmed that this solves the assertion failures.
> 
> Paolo/Michael: Can this still be merged for QEMU 7.1?
> 
> Stefan

As a bugfix, for sure.

Can I have your ack?

-- 
MST
Re: [PATCH] virtio-scsi: fix race in virtio_scsi_dataplane_start()
Posted by Stefano Garzarella 1 week ago
On Wed, Aug 03, 2022 at 12:28:24PM -0400, Stefan Hajnoczi wrote:
>As soon as virtio_scsi_data_plane_start() attaches host notifiers the
>IOThread may start virtqueue processing. There is a race between
>IOThread virtqueue processing and virtio_scsi_data_plane_start() because
>it only assigns s->dataplane_started after attaching host notifiers.
>
>When a virtqueue handler function in the IOThread calls
>virtio_scsi_defer_to_dataplane() it may see !s->dataplane_started and
>attempt to start dataplane even though we're already in the IOThread:
>
>  #0  0x00007f67b360857c __pthread_kill_implementation (libc.so.6 + 0xa257c)
>  #1  0x00007f67b35bbd56 raise (libc.so.6 + 0x55d56)
>  #2  0x00007f67b358e833 abort (libc.so.6 + 0x28833)
>  #3  0x00007f67b358e75b __assert_fail_base.cold (libc.so.6 + 0x2875b)
>  #4  0x00007f67b35b4cd6 __assert_fail (libc.so.6 + 0x4ecd6)
>  #5  0x000055ca87fd411b memory_region_transaction_commit (qemu-kvm + 0x67511b)
>  #6  0x000055ca87e17811 virtio_pci_ioeventfd_assign (qemu-kvm + 0x4b8811)
>  #7  0x000055ca87e14836 virtio_bus_set_host_notifier (qemu-kvm + 0x4b5836)
>  #8  0x000055ca87f8e14e virtio_scsi_set_host_notifier (qemu-kvm + 0x62f14e)
>  #9  0x000055ca87f8dd62 virtio_scsi_dataplane_start (qemu-kvm + 0x62ed62)
>  #10 0x000055ca87e14610 virtio_bus_start_ioeventfd (qemu-kvm + 0x4b5610)
>  #11 0x000055ca87f8c29a virtio_scsi_handle_ctrl (qemu-kvm + 0x62d29a)
>  #12 0x000055ca87fa5902 virtio_queue_host_notifier_read (qemu-kvm + 0x646902)
>  #13 0x000055ca882c099e aio_dispatch_handler (qemu-kvm + 0x96199e)
>  #14 0x000055ca882c1761 aio_poll (qemu-kvm + 0x962761)
>  #15 0x000055ca880e1052 iothread_run (qemu-kvm + 0x782052)
>  #16 0x000055ca882c562a qemu_thread_start (qemu-kvm + 0x96662a)
>
>This patch assigns s->dataplane_started before attaching host notifiers
>so that virtqueue handler functions that run in the IOThread before
>virtio_scsi_data_plane_start() returns correctly identify that dataplane
>does not need to be started.
>
>Note that s->dataplane_started does not need the AioContext lock because
>it is set before attaching host notifiers and cleared after detaching
>host notifiers. In other words, the IOThread always sees the value true
>and the main loop thread does not modify it while the IOThread is
>active.
>
>Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2099541
>Reported-by: Qing Wang <qinwang@redhat.com>
>Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
>---
> hw/scsi/virtio-scsi-dataplane.c | 33 +++++++++++++++++++++++++--------
> 1 file changed, 25 insertions(+), 8 deletions(-)

Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>