[RFC v2 4/5] virtiofs: perform DMA operations out of the spinlock

Eugenio Pérez posted 5 patches 9 months, 4 weeks ago
[RFC v2 4/5] virtiofs: perform DMA operations out of the spinlock
Posted by Eugenio Pérez 9 months, 4 weeks ago
This is useful for some setups like swiotlb or VDUSE where the DMA
operations are expensive and/or need to be performed with a write lock.

After applying this patch, fio read test goes from 1124MiB/s to 1191MiB/s.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
 fs/fuse/virtio_fs.c | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 1344c5782a7c..e19c78f2480e 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -836,8 +836,19 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
 
 	/* End requests */
 	list_for_each_entry_safe(req, next, &reqs, list) {
+		unsigned int total_sgs = req->out_sgs + req->in_sgs;
+
 		list_del_init(&req->list);
 
+		for (unsigned int i = 0; i < total_sgs; ++i) {
+			enum dma_data_direction dir = (i < req->out_sgs) ?
+				DMA_TO_DEVICE : DMA_FROM_DEVICE;
+			dma_unmap_page(vq->vdev->dev.parent,
+				       sg_dma_address(&req->sg[i]),
+				       sg_dma_len(&req->sg[i]), dir);
+
+		}
+
 		/* blocking async request completes in a worker context */
 		if (req->args->may_block) {
 			struct virtio_fs_req_work *w;
@@ -1426,6 +1437,24 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
 		sgs[i] = &req->sg[i];
 	WARN_ON(req->out_sgs + req->in_sgs != total_sgs);
 
+	// TODO can we change this ptr out of the lock?
+	vq = fsvq->vq;
+	// TODO handle this and following errors
+	for (i = 0; i < total_sgs; i++) {
+		struct page *page = sg_page(&req->sg[i]);
+		enum dma_data_direction dir = (i < req->out_sgs) ?
+			DMA_TO_DEVICE : DMA_FROM_DEVICE;
+		dma_addr_t dma_addr = dma_map_page(vq->vdev->dev.parent, page,
+						   req->sg[i].offset, req->sg[i].length, dir);
+
+		if (dma_mapping_error(vq->vdev->dev.parent, dma_addr)) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		sg_dma_address(&req->sg[i]) = dma_addr;
+		sg_dma_len(&req->sg[i]) = req->sg[i].length;
+	}
+
 	spin_lock(&fsvq->lock);
 
 	if (!fsvq->connected) {
@@ -1434,8 +1463,8 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
 		goto out;
 	}
 
-	vq = fsvq->vq;
-	ret = virtqueue_add_sgs(vq, sgs, req->out_sgs, req->in_sgs, req, GFP_ATOMIC);
+	ret = virtqueue_add_sgs_premapped(vq, sgs, req->out_sgs,
+					  req->in_sgs, req, GFP_ATOMIC);
 	if (ret < 0) {
 		spin_unlock(&fsvq->lock);
 		goto out;
@@ -1460,6 +1489,13 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
 		virtqueue_notify(vq);
 
 out:
+	for (unsigned int j = 0; ret && j < total_sgs; ++j) {
+		enum dma_data_direction dir = (j < req->out_sgs) ?
+			DMA_TO_DEVICE : DMA_FROM_DEVICE;
+		dma_unmap_page(vq->vdev->dev.parent,
+			       sg_dma_address(&req->sg[j]),
+			       sg_dma_len(&req->sg[j]), dir);
+	}
 	if (ret < 0 && req->argbuf) {
 		kfree(req->argbuf);
 		req->argbuf = NULL;
-- 
2.48.1

Re: [RFC v2 4/5] virtiofs: perform DMA operations out of the spinlock
Posted by Jason Wang 9 months, 3 weeks ago
On Sat, Feb 22, 2025 at 1:07 AM Eugenio Pérez <eperezma@redhat.com> wrote:
>
> This is useful for some setups like swiotlb or VDUSE where the DMA
> operations are expensive and/or need to be performed with a write lock.
>
> After applying this patch, fio read test goes from 1124MiB/s to 1191MiB/s.

What FIO parameter have you used? It might be worth trying different
sizes. It seems to be more obvious when using larger requests when I'm
testing similar optimization for virtio-blk.

And we also need to test without VDUSE, to make sure no regression in
classical setups.

Thanks

>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
>  fs/fuse/virtio_fs.c | 40 ++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 38 insertions(+), 2 deletions(-)
>
> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> index 1344c5782a7c..e19c78f2480e 100644
> --- a/fs/fuse/virtio_fs.c
> +++ b/fs/fuse/virtio_fs.c
> @@ -836,8 +836,19 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
>
>         /* End requests */
>         list_for_each_entry_safe(req, next, &reqs, list) {
> +               unsigned int total_sgs = req->out_sgs + req->in_sgs;
> +
>                 list_del_init(&req->list);
>
> +               for (unsigned int i = 0; i < total_sgs; ++i) {
> +                       enum dma_data_direction dir = (i < req->out_sgs) ?
> +                               DMA_TO_DEVICE : DMA_FROM_DEVICE;
> +                       dma_unmap_page(vq->vdev->dev.parent,
> +                                      sg_dma_address(&req->sg[i]),
> +                                      sg_dma_len(&req->sg[i]), dir);
> +
> +               }
> +
>                 /* blocking async request completes in a worker context */
>                 if (req->args->may_block) {
>                         struct virtio_fs_req_work *w;
> @@ -1426,6 +1437,24 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
>                 sgs[i] = &req->sg[i];
>         WARN_ON(req->out_sgs + req->in_sgs != total_sgs);
>
> +       // TODO can we change this ptr out of the lock?
> +       vq = fsvq->vq;
> +       // TODO handle this and following errors
> +       for (i = 0; i < total_sgs; i++) {
> +               struct page *page = sg_page(&req->sg[i]);
> +               enum dma_data_direction dir = (i < req->out_sgs) ?
> +                       DMA_TO_DEVICE : DMA_FROM_DEVICE;
> +               dma_addr_t dma_addr = dma_map_page(vq->vdev->dev.parent, page,
> +                                                  req->sg[i].offset, req->sg[i].length, dir);
> +
> +               if (dma_mapping_error(vq->vdev->dev.parent, dma_addr)) {
> +                       ret = -ENOMEM;
> +                       goto out;
> +               }
> +               sg_dma_address(&req->sg[i]) = dma_addr;
> +               sg_dma_len(&req->sg[i]) = req->sg[i].length;
> +       }
> +
>         spin_lock(&fsvq->lock);
>
>         if (!fsvq->connected) {
> @@ -1434,8 +1463,8 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
>                 goto out;
>         }
>
> -       vq = fsvq->vq;
> -       ret = virtqueue_add_sgs(vq, sgs, req->out_sgs, req->in_sgs, req, GFP_ATOMIC);
> +       ret = virtqueue_add_sgs_premapped(vq, sgs, req->out_sgs,
> +                                         req->in_sgs, req, GFP_ATOMIC);
>         if (ret < 0) {
>                 spin_unlock(&fsvq->lock);
>                 goto out;
> @@ -1460,6 +1489,13 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
>                 virtqueue_notify(vq);
>
>  out:
> +       for (unsigned int j = 0; ret && j < total_sgs; ++j) {
> +               enum dma_data_direction dir = (j < req->out_sgs) ?
> +                       DMA_TO_DEVICE : DMA_FROM_DEVICE;
> +               dma_unmap_page(vq->vdev->dev.parent,
> +                              sg_dma_address(&req->sg[j]),
> +                              sg_dma_len(&req->sg[j]), dir);
> +       }
>         if (ret < 0 && req->argbuf) {
>                 kfree(req->argbuf);
>                 req->argbuf = NULL;
> --
> 2.48.1
>