[v2] nvdimm: virtio_pmem: fix request lifetime and converge broken queue failures

[PATCH V2 1/5] nvdimm: virtio_pmem: always wake -ENOSPC waiters

Posted by Li Chen 1 month, 2 weeks ago

virtio_pmem_host_ack() reclaims virtqueue descriptors with
virtqueue_get_buf(). The -ENOSPC waiter wakeup is tied to completing the
returned token.

If token completion is skipped for any reason, reclaimed descriptors may
not wake a waiter and the submitter may sleep forever waiting for a free
slot.

Always wake one -ENOSPC waiter for each virtqueue completion before
touching the returned token.

Use READ_ONCE()/WRITE_ONCE() for the wait_event() flags (done and
wq_buf_avail). They are observed by waiters without pmem_lock, so make
the accesses explicit single loads/stores and avoid compiler
reordering/caching across the wait/wake paths.

Signed-off-by: Li Chen <me@linux.beauty>
---
 drivers/nvdimm/nd_virtio.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index c3f07be4aa22..6f9890361d0b 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -9,26 +9,33 @@
 #include "virtio_pmem.h"
 #include "nd.h"
 
+static void virtio_pmem_wake_one_waiter(struct virtio_pmem *vpmem)
+{
+	struct virtio_pmem_request *req_buf;
+
+	if (list_empty(&vpmem->req_list))
+		return;
+
+	req_buf = list_first_entry(&vpmem->req_list,
+				  struct virtio_pmem_request, list);
+	list_del_init(&req_buf->list);
+	WRITE_ONCE(req_buf->wq_buf_avail, true);
+	wake_up(&req_buf->wq_buf);
+}
+
  /* The interrupt handler */
 void virtio_pmem_host_ack(struct virtqueue *vq)
 {
 	struct virtio_pmem *vpmem = vq->vdev->priv;
-	struct virtio_pmem_request *req_data, *req_buf;
+	struct virtio_pmem_request *req_data;
 	unsigned long flags;
 	unsigned int len;
 
 	spin_lock_irqsave(&vpmem->pmem_lock, flags);
 	while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) {
-		req_data->done = true;
+		virtio_pmem_wake_one_waiter(vpmem);
+		WRITE_ONCE(req_data->done, true);
 		wake_up(&req_data->host_acked);
-
-		if (!list_empty(&vpmem->req_list)) {
-			req_buf = list_first_entry(&vpmem->req_list,
-					struct virtio_pmem_request, list);
-			req_buf->wq_buf_avail = true;
-			wake_up(&req_buf->wq_buf);
-			list_del(&req_buf->list);
-		}
 	}
 	spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
 }
@@ -58,7 +65,7 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
 	if (!req_data)
 		return -ENOMEM;
 
-	req_data->done = false;
+	WRITE_ONCE(req_data->done, false);
 	init_waitqueue_head(&req_data->host_acked);
 	init_waitqueue_head(&req_data->wq_buf);
 	INIT_LIST_HEAD(&req_data->list);
@@ -79,12 +86,12 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
 					GFP_ATOMIC)) == -ENOSPC) {
 
 		dev_info(&vdev->dev, "failed to send command to virtio pmem device, no free slots in the virtqueue\n");
-		req_data->wq_buf_avail = false;
+		WRITE_ONCE(req_data->wq_buf_avail, false);
 		list_add_tail(&req_data->list, &vpmem->req_list);
 		spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
 
 		/* A host response results in "host_ack" getting called */
-		wait_event(req_data->wq_buf, req_data->wq_buf_avail);
+		wait_event(req_data->wq_buf, READ_ONCE(req_data->wq_buf_avail));
 		spin_lock_irqsave(&vpmem->pmem_lock, flags);
 	}
 	err1 = virtqueue_kick(vpmem->req_vq);
@@ -98,7 +105,7 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
 		err = -EIO;
 	} else {
 		/* A host response results in "host_ack" getting called */
-		wait_event(req_data->host_acked, req_data->done);
+		wait_event(req_data->host_acked, READ_ONCE(req_data->done));
 		err = le32_to_cpu(req_data->resp.ret);
 	}
 
-- 
2.52.0

Re: [PATCH V2 1/5] nvdimm: virtio_pmem: always wake -ENOSPC waiters

Posted by Pankaj Gupta 1 month, 1 week ago

+CC MST
> virtio_pmem_host_ack() reclaims virtqueue descriptors with
> virtqueue_get_buf(). The -ENOSPC waiter wakeup is tied to completing the
> returned token.
>
> If token completion is skipped for any reason, reclaimed descriptors may
> not wake a waiter and the submitter may sleep forever waiting for a free
> slot.
>
> Always wake one -ENOSPC waiter for each virtqueue completion before
> touching the returned token.
>
> Use READ_ONCE()/WRITE_ONCE() for the wait_event() flags (done and
> wq_buf_avail). They are observed by waiters without pmem_lock, so make
> the accesses explicit single loads/stores and avoid compiler
> reordering/caching across the wait/wake paths.
>
> Signed-off-by: Li Chen <me@linux.beauty>
> ---
>  drivers/nvdimm/nd_virtio.c | 35 +++++++++++++++++++++--------------
>  1 file changed, 21 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> index c3f07be4aa22..6f9890361d0b 100644
> --- a/drivers/nvdimm/nd_virtio.c
> +++ b/drivers/nvdimm/nd_virtio.c
> @@ -9,26 +9,33 @@
>  #include "virtio_pmem.h"
>  #include "nd.h"
>
> +static void virtio_pmem_wake_one_waiter(struct virtio_pmem *vpmem)
> +{
> +       struct virtio_pmem_request *req_buf;
> +
> +       if (list_empty(&vpmem->req_list))
> +               return;
> +
> +       req_buf = list_first_entry(&vpmem->req_list,
> +                                 struct virtio_pmem_request, list);

[...]
> +       list_del_init(&req_buf->list);
> +       WRITE_ONCE(req_buf->wq_buf_avail, true);
> +       wake_up(&req_buf->wq_buf);

Seems with the above change (3 line fix), you are allowing to wakeup a waiter
before accessing the token. Maybe simplify the patch by just
keeping this change in the single patch & other changes (READ_ONCE/WRITE_ONCE)
onto separate patch with corresponding commit log.

Thanks,
Pankaj

> +}
> +
>   /* The interrupt handler */
>  void virtio_pmem_host_ack(struct virtqueue *vq)
>  {
>         struct virtio_pmem *vpmem = vq->vdev->priv;
> -       struct virtio_pmem_request *req_data, *req_buf;
> +       struct virtio_pmem_request *req_data;
>         unsigned long flags;
>         unsigned int len;
>
>         spin_lock_irqsave(&vpmem->pmem_lock, flags);
>         while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) {
> -               req_data->done = true;
> +               virtio_pmem_wake_one_waiter(vpmem);
> +               WRITE_ONCE(req_data->done, true);
>                 wake_up(&req_data->host_acked);
> -
> -               if (!list_empty(&vpmem->req_list)) {
> -                       req_buf = list_first_entry(&vpmem->req_list,
> -                                       struct virtio_pmem_request, list);
> -                       req_buf->wq_buf_avail = true;
> -                       wake_up(&req_buf->wq_buf);
> -                       list_del(&req_buf->list);
> -               }
>         }
>         spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
>  }
> @@ -58,7 +65,7 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
>         if (!req_data)
>                 return -ENOMEM;
>
> -       req_data->done = false;
> +       WRITE_ONCE(req_data->done, false);
>         init_waitqueue_head(&req_data->host_acked);
>         init_waitqueue_head(&req_data->wq_buf);
>         INIT_LIST_HEAD(&req_data->list);
> @@ -79,12 +86,12 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
>                                         GFP_ATOMIC)) == -ENOSPC) {
>
>                 dev_info(&vdev->dev, "failed to send command to virtio pmem device, no free slots in the virtqueue\n");
> -               req_data->wq_buf_avail = false;
> +               WRITE_ONCE(req_data->wq_buf_avail, false);
>                 list_add_tail(&req_data->list, &vpmem->req_list);
>                 spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
>
>                 /* A host response results in "host_ack" getting called */
> -               wait_event(req_data->wq_buf, req_data->wq_buf_avail);
> +               wait_event(req_data->wq_buf, READ_ONCE(req_data->wq_buf_avail));
>                 spin_lock_irqsave(&vpmem->pmem_lock, flags);
>         }
>         err1 = virtqueue_kick(vpmem->req_vq);
> @@ -98,7 +105,7 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
>                 err = -EIO;
>         } else {
>                 /* A host response results in "host_ack" getting called */
> -               wait_event(req_data->host_acked, req_data->done);
> +               wait_event(req_data->host_acked, READ_ONCE(req_data->done));
>                 err = le32_to_cpu(req_data->resp.ret);
>         }
>
> --
> 2.52.0
>

[PATCH V2 1/5] nvdimm: virtio_pmem: always wake -ENOSPC waiters
[PATCH V2 2/5] nvdimm: virtio_pmem: refcount requests for token lifetime
[PATCH V2 3/5] nvdimm: virtio_pmem: converge broken virtqueue to -EIO
[PATCH V2 4/5] nvdimm: virtio_pmem: drain requests in freeze
[PATCH 5/5] nvdimm: nd_virtio: export virtio_pmem_mark_broken_and_drain