[PATCH v4 17/20] virtio-blk: implement BlockDevOps->drained_begin()

Stefan Hajnoczi posted 20 patches 2 years, 9 months ago
There is a newer version of this series
[PATCH v4 17/20] virtio-blk: implement BlockDevOps->drained_begin()
Posted by Stefan Hajnoczi 2 years, 9 months ago
Detach ioeventfds during drained sections to stop I/O submission from
the guest. virtio-blk is no longer reliant on aio_disable_external()
after this patch. This will allow us to remove the
aio_disable_external() API once all other code that relies on it is
converted.

Take extra care to avoid attaching/detaching ioeventfds if the data
plane is started/stopped during a drained section. This should be rare,
but maybe the mirror block job can trigger it.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/block/dataplane/virtio-blk.c | 17 +++++++++------
 hw/block/virtio-blk.c           | 38 ++++++++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index bd7cc6e76b..d77fc6028c 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -245,13 +245,15 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
     }
 
     /* Get this show started by hooking up our callbacks */
-    aio_context_acquire(s->ctx);
-    for (i = 0; i < nvqs; i++) {
-        VirtQueue *vq = virtio_get_queue(s->vdev, i);
+    if (!blk_in_drain(s->conf->conf.blk)) {
+        aio_context_acquire(s->ctx);
+        for (i = 0; i < nvqs; i++) {
+            VirtQueue *vq = virtio_get_queue(s->vdev, i);
 
-        virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+            virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+        }
+        aio_context_release(s->ctx);
     }
-    aio_context_release(s->ctx);
     return 0;
 
   fail_aio_context:
@@ -317,7 +319,10 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
     trace_virtio_blk_data_plane_stop(s);
 
     aio_context_acquire(s->ctx);
-    aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
+
+    if (!blk_in_drain(s->conf->conf.blk)) {
+        aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
+    }
 
     /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */
     blk_drain(s->conf->conf.blk);
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index cefca93b31..d8dedc575c 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1109,8 +1109,44 @@ static void virtio_blk_resize(void *opaque)
     aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev);
 }
 
+/* Suspend virtqueue ioeventfd processing during drain */
+static void virtio_blk_drained_begin(void *opaque)
+{
+    VirtIOBlock *s = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+    AioContext *ctx = blk_get_aio_context(s->conf.conf.blk);
+
+    if (!s->dataplane || !s->dataplane_started) {
+        return;
+    }
+
+    for (uint16_t i = 0; i < s->conf.num_queues; i++) {
+        VirtQueue *vq = virtio_get_queue(vdev, i);
+        virtio_queue_aio_detach_host_notifier(vq, ctx);
+    }
+}
+
+/* Resume virtqueue ioeventfd processing after drain */
+static void virtio_blk_drained_end(void *opaque)
+{
+    VirtIOBlock *s = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+    AioContext *ctx = blk_get_aio_context(s->conf.conf.blk);
+
+    if (!s->dataplane || !s->dataplane_started) {
+        return;
+    }
+
+    for (uint16_t i = 0; i < s->conf.num_queues; i++) {
+        VirtQueue *vq = virtio_get_queue(vdev, i);
+        virtio_queue_aio_attach_host_notifier(vq, ctx);
+    }
+}
+
 static const BlockDevOps virtio_block_ops = {
-    .resize_cb = virtio_blk_resize,
+    .resize_cb     = virtio_blk_resize,
+    .drained_begin = virtio_blk_drained_begin,
+    .drained_end   = virtio_blk_drained_end,
 };
 
 static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
-- 
2.39.2
Re: [PATCH v4 17/20] virtio-blk: implement BlockDevOps->drained_begin()
Posted by Kevin Wolf 2 years, 9 months ago
Am 25.04.2023 um 19:27 hat Stefan Hajnoczi geschrieben:
> Detach ioeventfds during drained sections to stop I/O submission from
> the guest. virtio-blk is no longer reliant on aio_disable_external()
> after this patch. This will allow us to remove the
> aio_disable_external() API once all other code that relies on it is
> converted.
> 
> Take extra care to avoid attaching/detaching ioeventfds if the data
> plane is started/stopped during a drained section. This should be rare,
> but maybe the mirror block job can trigger it.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  hw/block/dataplane/virtio-blk.c | 17 +++++++++------
>  hw/block/virtio-blk.c           | 38 ++++++++++++++++++++++++++++++++-
>  2 files changed, 48 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> index bd7cc6e76b..d77fc6028c 100644
> --- a/hw/block/dataplane/virtio-blk.c
> +++ b/hw/block/dataplane/virtio-blk.c
> @@ -245,13 +245,15 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
>      }
>  
>      /* Get this show started by hooking up our callbacks */
> -    aio_context_acquire(s->ctx);
> -    for (i = 0; i < nvqs; i++) {
> -        VirtQueue *vq = virtio_get_queue(s->vdev, i);
> +    if (!blk_in_drain(s->conf->conf.blk)) {
> +        aio_context_acquire(s->ctx);
> +        for (i = 0; i < nvqs; i++) {
> +            VirtQueue *vq = virtio_get_queue(s->vdev, i);
>  
> -        virtio_queue_aio_attach_host_notifier(vq, s->ctx);
> +            virtio_queue_aio_attach_host_notifier(vq, s->ctx);
> +        }
> +        aio_context_release(s->ctx);
>      }
> -    aio_context_release(s->ctx);
>      return 0;
>  
>    fail_aio_context:
> @@ -317,7 +319,10 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
>      trace_virtio_blk_data_plane_stop(s);
>  
>      aio_context_acquire(s->ctx);
> -    aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
> +
> +    if (!blk_in_drain(s->conf->conf.blk)) {
> +        aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
> +    }

So here we actually get a semantic change: What you described as the
second part in the previous patch, processing the virtqueue one last
time, isn't done any more if the device is drained.

If it's okay to just skip this during drain, why do we need to do it
outside of drain?

Kevin
Re: [PATCH v4 17/20] virtio-blk: implement BlockDevOps->drained_begin()
Posted by Stefan Hajnoczi 2 years, 9 months ago
On Thu, May 04, 2023 at 11:13:42PM +0200, Kevin Wolf wrote:
> Am 25.04.2023 um 19:27 hat Stefan Hajnoczi geschrieben:
> > Detach ioeventfds during drained sections to stop I/O submission from
> > the guest. virtio-blk is no longer reliant on aio_disable_external()
> > after this patch. This will allow us to remove the
> > aio_disable_external() API once all other code that relies on it is
> > converted.
> > 
> > Take extra care to avoid attaching/detaching ioeventfds if the data
> > plane is started/stopped during a drained section. This should be rare,
> > but maybe the mirror block job can trigger it.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  hw/block/dataplane/virtio-blk.c | 17 +++++++++------
> >  hw/block/virtio-blk.c           | 38 ++++++++++++++++++++++++++++++++-
> >  2 files changed, 48 insertions(+), 7 deletions(-)
> > 
> > diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> > index bd7cc6e76b..d77fc6028c 100644
> > --- a/hw/block/dataplane/virtio-blk.c
> > +++ b/hw/block/dataplane/virtio-blk.c
> > @@ -245,13 +245,15 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
> >      }
> >  
> >      /* Get this show started by hooking up our callbacks */
> > -    aio_context_acquire(s->ctx);
> > -    for (i = 0; i < nvqs; i++) {
> > -        VirtQueue *vq = virtio_get_queue(s->vdev, i);
> > +    if (!blk_in_drain(s->conf->conf.blk)) {
> > +        aio_context_acquire(s->ctx);
> > +        for (i = 0; i < nvqs; i++) {
> > +            VirtQueue *vq = virtio_get_queue(s->vdev, i);
> >  
> > -        virtio_queue_aio_attach_host_notifier(vq, s->ctx);
> > +            virtio_queue_aio_attach_host_notifier(vq, s->ctx);
> > +        }
> > +        aio_context_release(s->ctx);
> >      }
> > -    aio_context_release(s->ctx);
> >      return 0;
> >  
> >    fail_aio_context:
> > @@ -317,7 +319,10 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
> >      trace_virtio_blk_data_plane_stop(s);
> >  
> >      aio_context_acquire(s->ctx);
> > -    aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
> > +
> > +    if (!blk_in_drain(s->conf->conf.blk)) {
> > +        aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
> > +    }
> 
> So here we actually get a semantic change: What you described as the
> second part in the previous patch, processing the virtqueue one last
> time, isn't done any more if the device is drained.
> 
> If it's okay to just skip this during drain, why do we need to do it
> outside of drain?

I forgot to answer why we need to process requests one last time outside
drain.

This approach comes from how vhost uses ioeventfd. When switching from
vhost back to QEMU emulation, there's a chance that a final virtqueue
kick snuck in while ioeventfd was being disabled.

This is not the case with dataplane nowadays (it may have in the past).
The only state dataplane transitions are device reset and 'stop'/'cont'.
Neither of these require QEMU to process new requests in while stopping
dataplane.

My confidence is not 100%, but still pretty high that the
virtio_queue_host_notifier_read() call could be dropped from dataplane
code. Since I'm not 100% sure I didn't attempt that.

Stefan
Re: [PATCH v4 17/20] virtio-blk: implement BlockDevOps->drained_begin()
Posted by Stefan Hajnoczi 2 years, 9 months ago
On Thu, May 04, 2023 at 11:13:42PM +0200, Kevin Wolf wrote:
> Am 25.04.2023 um 19:27 hat Stefan Hajnoczi geschrieben:
> > Detach ioeventfds during drained sections to stop I/O submission from
> > the guest. virtio-blk is no longer reliant on aio_disable_external()
> > after this patch. This will allow us to remove the
> > aio_disable_external() API once all other code that relies on it is
> > converted.
> > 
> > Take extra care to avoid attaching/detaching ioeventfds if the data
> > plane is started/stopped during a drained section. This should be rare,
> > but maybe the mirror block job can trigger it.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  hw/block/dataplane/virtio-blk.c | 17 +++++++++------
> >  hw/block/virtio-blk.c           | 38 ++++++++++++++++++++++++++++++++-
> >  2 files changed, 48 insertions(+), 7 deletions(-)
> > 
> > diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> > index bd7cc6e76b..d77fc6028c 100644
> > --- a/hw/block/dataplane/virtio-blk.c
> > +++ b/hw/block/dataplane/virtio-blk.c
> > @@ -245,13 +245,15 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
> >      }
> >  
> >      /* Get this show started by hooking up our callbacks */
> > -    aio_context_acquire(s->ctx);
> > -    for (i = 0; i < nvqs; i++) {
> > -        VirtQueue *vq = virtio_get_queue(s->vdev, i);
> > +    if (!blk_in_drain(s->conf->conf.blk)) {
> > +        aio_context_acquire(s->ctx);
> > +        for (i = 0; i < nvqs; i++) {
> > +            VirtQueue *vq = virtio_get_queue(s->vdev, i);
> >  
> > -        virtio_queue_aio_attach_host_notifier(vq, s->ctx);
> > +            virtio_queue_aio_attach_host_notifier(vq, s->ctx);
> > +        }
> > +        aio_context_release(s->ctx);
> >      }
> > -    aio_context_release(s->ctx);
> >      return 0;
> >  
> >    fail_aio_context:
> > @@ -317,7 +319,10 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
> >      trace_virtio_blk_data_plane_stop(s);
> >  
> >      aio_context_acquire(s->ctx);
> > -    aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
> > +
> > +    if (!blk_in_drain(s->conf->conf.blk)) {
> > +        aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
> > +    }
> 
> So here we actually get a semantic change: What you described as the
> second part in the previous patch, processing the virtqueue one last
> time, isn't done any more if the device is drained.
> 
> If it's okay to just skip this during drain, why do we need to do it
> outside of drain?

Yes, it's safe because virtio_blk_data_plane_stop() has two cases:
1. The device is being reset. It is not necessary to process new
   requests.
2. 'stop'/'cont'. 'cont' will call virtio_blk_data_plane_start() ->
   event_notifier_set() so new requests will be processed when the guest
   resumes exection.

That's why I think this is safe and the right thing to do.

However, your question led me to a pre-existing drain bug when a vCPU
resets the device during a drained section (e.g. when a mirror block job
has started a drained section and the main loop runs until the block job
exits). New requests must not be processed by
virtio_blk_data_plane_stop() because that would violate drain semantics.

It turns out requests are still processed because of
virtio_blk_data_plane_stop() -> virtio_bus_cleanup_host_notifier() ->
virtio_queue_host_notifier_read().

I think that should be handled in a separate patch series. It's not
related to aio_disable_external().

Stefan