[PATCH 13/15] fuse: Process requests in coroutines

Hanna Czenczek posted 15 patches 1 week ago
[PATCH 13/15] fuse: Process requests in coroutines
Posted by Hanna Czenczek 1 week ago
Make fuse_process_request() a coroutine_fn (fuse_co_process_request())
and have read_from_fuse_fd() launch it inside of a newly created
coroutine instead of running it synchronously.  This way, we can process
requests in parallel.

These are the benchmark results, compared to (a) the original results
with libfuse, and (b) the results after switching away from libfuse
(i.e. before this patch):

file:                  (vs. libfuse / vs. no libfuse)
  read:
    seq aio:   120.6k ±1.1k (+ 53 % / + 58 %)
    rand aio:  113.3k ±5.9k (+188 % / +325 %)
    seq sync:   52.4k ±0.4k (+ 61 % / + 10 %)
    rand sync:  10.4k ±0.4k (+  6 % / +  3 %)
  write:
    seq aio:    79.8k ±0.8k (+ 29 % / + 37 %)
    rand aio:   79.0k ±0.6k (+ 29 % / + 36 %)
    seq sync:   41.5k ±0.3k (+ 49 % / + 15 %)
    rand sync:  41.4k ±0.2k (+ 50 % / + 15 %)
null:
  read:
    seq aio:   266.1k ±1.5k (+ 24 % / -  1 %)
    rand aio:  264.1k ±2.5k (+ 24 % / ±  0 %)
    seq sync:  135.6k ±3.2k (+ 50 % / +  1 %)
    rand sync: 134.7k ±3.0k (+ 50 % / +  2 %)
  write:
    seq aio:   281.0k ±1.8k (+ 38 % / +  2 %)
    rand aio:  288.1k ±6.1k (+ 43 % / +  6 %)
    seq sync:  142.2k ±3.1k (+ 65 % / +  9 %)
    rand sync: 141.1k ±2.9k (+ 66 % / + 11 %)

So for non-AIO cases (and the null driver, which does not yield), there
is little change; but for file AIO, results greatly improve, resolving
the performance issue we saw before (when switching away from libfuse).

Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
 block/export/fuse.c | 209 ++++++++++++++++++++++++++------------------
 1 file changed, 126 insertions(+), 83 deletions(-)

diff --git a/block/export/fuse.c b/block/export/fuse.c
index 1b399eeab7..345e833171 100644
--- a/block/export/fuse.c
+++ b/block/export/fuse.c
@@ -27,6 +27,7 @@
 #include "block/qapi.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-block.h"
+#include "qemu/coroutine.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "system/block-backend.h"
@@ -86,6 +87,12 @@ typedef struct FuseExport {
     gid_t st_gid;
 } FuseExport;
 
+/* Parameters to the request processing coroutine */
+typedef struct FuseRequestCoParam {
+    FuseExport *exp;
+    int got_request;
+} FuseRequestCoParam;
+
 static GHashTable *exports;
 
 static void fuse_export_shutdown(BlockExport *exp);
@@ -100,7 +107,7 @@ static bool is_regular_file(const char *path, Error **errp);
 
 static bool poll_fuse_fd(void *opaque);
 static void read_fuse_fd(void *opaque);
-static void fuse_process_request(FuseExport *exp);
+static void coroutine_fn fuse_co_process_request(FuseExport *exp);
 
 static void fuse_inc_in_flight(FuseExport *exp)
 {
@@ -350,12 +357,20 @@ static int mount_fuse_export(FuseExport *exp, Error **errp)
 
 /**
  * Try to read a single request from the FUSE FD.
- * If a request is available, process it, and return true.
- * Otherwise, return false.
+ * Takes a FuseRequestCoParam object pointer in `opaque`.
+ *
+ * If a request is available, process it, and set FuseRequestCoParam.got_request
+ * to 1.  Otherwise, set it to 0.
+ * (Not using a boolean allows callers to initialize it e.g. with -EINPROGRESS.)
+ *
+ * The FuseRequestCoParam object is only accessed until yielding for the first
+ * time, i.e. may be dropped by the caller after running the coroutine until it
+ * yields.
  */
-static bool read_from_fuse_fd(void *opaque)
+static void coroutine_fn co_read_from_fuse_fd(void *opaque)
 {
-    FuseExport *exp = opaque;
+    FuseRequestCoParam *co_param = opaque;
+    FuseExport *exp = co_param->exp;
     int fuse_fd = exp->fuse_fd;
     ssize_t ret;
     const struct fuse_in_header *in_hdr;
@@ -396,13 +411,15 @@ static bool read_from_fuse_fd(void *opaque)
         goto no_request;
     }
 
-    fuse_process_request(exp);
+    /* Must set this before yielding */
+    co_param->got_request = 1;
+    fuse_co_process_request(exp);
     fuse_dec_in_flight(exp);
-    return true;
+    return;
 
 no_request:
+    co_param->got_request = 0;
     fuse_dec_in_flight(exp);
-    return false;
 }
 
 /**
@@ -413,7 +430,17 @@ no_request:
  */
 static bool poll_fuse_fd(void *opaque)
 {
-    return read_from_fuse_fd(opaque);
+    Coroutine *co;
+    FuseRequestCoParam co_param = {
+        .exp = opaque,
+        .got_request = -EINPROGRESS,
+    };
+
+    co = qemu_coroutine_create(co_read_from_fuse_fd, &co_param);
+    qemu_coroutine_enter(co);
+    assert(co_param.got_request != -EINPROGRESS);
+
+    return co_param.got_request;
 }
 
 /**
@@ -422,7 +449,15 @@ static bool poll_fuse_fd(void *opaque)
  */
 static void read_fuse_fd(void *opaque)
 {
-    read_from_fuse_fd(opaque);
+    Coroutine *co;
+    FuseRequestCoParam co_param = {
+        .exp = opaque,
+        .got_request = -EINPROGRESS,
+    };
+
+    co = qemu_coroutine_create(co_read_from_fuse_fd, &co_param);
+    qemu_coroutine_enter(co);
+    assert(co_param.got_request != -EINPROGRESS);
 }
 
 static void fuse_export_shutdown(BlockExport *blk_exp)
@@ -498,8 +533,9 @@ static bool is_regular_file(const char *path, Error **errp)
  * Process FUSE INIT.
  * Return the number of bytes written to *out on success, and -errno on error.
  */
-static ssize_t fuse_init(FuseExport *exp, struct fuse_init_out *out,
-                         uint32_t max_readahead, uint32_t flags)
+static ssize_t coroutine_fn
+fuse_co_init(FuseExport *exp, struct fuse_init_out *out,
+             uint32_t max_readahead, uint32_t flags)
 {
     const uint32_t supported_flags = FUSE_ASYNC_READ | FUSE_ASYNC_DIO;
 
@@ -538,17 +574,18 @@ static ssize_t fuse_init(FuseExport *exp, struct fuse_init_out *out,
  * Let clients get file attributes (i.e., stat() the file).
  * Return the number of bytes written to *out on success, and -errno on error.
  */
-static ssize_t fuse_getattr(FuseExport *exp, struct fuse_attr_out *out)
+static ssize_t coroutine_fn
+fuse_co_getattr(FuseExport *exp, struct fuse_attr_out *out)
 {
     int64_t length, allocated_blocks;
     time_t now = time(NULL);
 
-    length = blk_getlength(exp->common.blk);
+    length = blk_co_getlength(exp->common.blk);
     if (length < 0) {
         return length;
     }
 
-    allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk));
+    allocated_blocks = bdrv_co_get_allocated_file_size(blk_bs(exp->common.blk));
     if (allocated_blocks <= 0) {
         allocated_blocks = DIV_ROUND_UP(length, 512);
     } else {
@@ -575,8 +612,9 @@ static ssize_t fuse_getattr(FuseExport *exp, struct fuse_attr_out *out)
     return sizeof(*out);
 }
 
-static int fuse_do_truncate(const FuseExport *exp, int64_t size,
-                            bool req_zero_write, PreallocMode prealloc)
+static int coroutine_fn
+fuse_co_do_truncate(const FuseExport *exp, int64_t size, bool req_zero_write,
+                    PreallocMode prealloc)
 {
     uint64_t blk_perm, blk_shared_perm;
     BdrvRequestFlags truncate_flags = 0;
@@ -605,8 +643,8 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size,
         }
     }
 
-    ret = blk_truncate(exp->common.blk, size, true, prealloc,
-                       truncate_flags, NULL);
+    ret = blk_co_truncate(exp->common.blk, size, true, prealloc,
+                          truncate_flags, NULL);
 
     if (add_resize_perm) {
         /* Must succeed, because we are only giving up the RESIZE permission */
@@ -627,9 +665,9 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size,
  * they cannot be given non-owner access.
  * Return the number of bytes written to *out on success, and -errno on error.
  */
-static ssize_t fuse_setattr(FuseExport *exp, struct fuse_attr_out *out,
-                            uint32_t to_set, uint64_t size, uint32_t mode,
-                            uint32_t uid, uint32_t gid)
+static ssize_t coroutine_fn
+fuse_co_setattr(FuseExport *exp, struct fuse_attr_out *out, uint32_t to_set,
+                uint64_t size, uint32_t mode, uint32_t uid, uint32_t gid)
 {
     int supported_attrs;
     int ret;
@@ -666,7 +704,7 @@ static ssize_t fuse_setattr(FuseExport *exp, struct fuse_attr_out *out,
             return -EACCES;
         }
 
-        ret = fuse_do_truncate(exp, size, true, PREALLOC_MODE_OFF);
+        ret = fuse_co_do_truncate(exp, size, true, PREALLOC_MODE_OFF);
         if (ret < 0) {
             return ret;
         }
@@ -685,7 +723,7 @@ static ssize_t fuse_setattr(FuseExport *exp, struct fuse_attr_out *out,
         exp->st_gid = gid;
     }
 
-    return fuse_getattr(exp, out);
+    return fuse_co_getattr(exp, out);
 }
 
 /**
@@ -693,7 +731,8 @@ static ssize_t fuse_setattr(FuseExport *exp, struct fuse_attr_out *out,
  * just acknowledge the request.
  * Return the number of bytes written to *out on success, and -errno on error.
  */
-static ssize_t fuse_open(FuseExport *exp, struct fuse_open_out *out)
+static ssize_t coroutine_fn
+fuse_co_open(FuseExport *exp, struct fuse_open_out *out)
 {
     *out = (struct fuse_open_out) {
         .open_flags = FOPEN_DIRECT_IO | FOPEN_PARALLEL_DIRECT_WRITES,
@@ -706,8 +745,8 @@ static ssize_t fuse_open(FuseExport *exp, struct fuse_open_out *out)
  * data from the block device into that buffer.
  * Returns the buffer (read) size on success, and -errno on error.
  */
-static ssize_t fuse_read(FuseExport *exp, void **bufptr,
-                         uint64_t offset, uint32_t size)
+static ssize_t coroutine_fn
+fuse_co_read(FuseExport *exp, void **bufptr, uint64_t offset, uint32_t size)
 {
     int64_t blk_len;
     void *buf;
@@ -722,7 +761,7 @@ static ssize_t fuse_read(FuseExport *exp, void **bufptr,
      * Clients will expect short reads at EOF, so we have to limit
      * offset+size to the image length.
      */
-    blk_len = blk_getlength(exp->common.blk);
+    blk_len = blk_co_getlength(exp->common.blk);
     if (blk_len < 0) {
         return blk_len;
     }
@@ -736,7 +775,7 @@ static ssize_t fuse_read(FuseExport *exp, void **bufptr,
         return -ENOMEM;
     }
 
-    ret = blk_pread(exp->common.blk, offset, size, buf, 0);
+    ret = blk_co_pread(exp->common.blk, offset, size, buf, 0);
     if (ret < 0) {
         qemu_vfree(buf);
         return ret;
@@ -748,11 +787,12 @@ static ssize_t fuse_read(FuseExport *exp, void **bufptr,
 
 /**
  * Handle client writes to the exported image.  @buf has the data to be written
- * and will be copied to a bounce buffer before polling for the first time.
+ * and will be copied to a bounce buffer before yielding for the first time.
  * Return the number of bytes written to *out on success, and -errno on error.
  */
-static ssize_t fuse_write(FuseExport *exp, struct fuse_write_out *out,
-                          uint64_t offset, uint32_t size, const void *buf)
+static ssize_t coroutine_fn
+fuse_co_write(FuseExport *exp, struct fuse_write_out *out,
+              uint64_t offset, uint32_t size, const void *buf)
 {
     void *copied;
     int64_t blk_len;
@@ -767,7 +807,7 @@ static ssize_t fuse_write(FuseExport *exp, struct fuse_write_out *out,
         return -EACCES;
     }
 
-    /* Must copy to bounce buffer before polling (to allow nesting) */
+    /* Must copy to bounce buffer before potentially yielding */
     copied = blk_blockalign(exp->common.blk, size);
     memcpy(copied, buf, size);
 
@@ -775,7 +815,7 @@ static ssize_t fuse_write(FuseExport *exp, struct fuse_write_out *out,
      * Clients will expect short writes at EOF, so we have to limit
      * offset+size to the image length.
      */
-    blk_len = blk_getlength(exp->common.blk);
+    blk_len = blk_co_getlength(exp->common.blk);
     if (blk_len < 0) {
         ret = blk_len;
         goto fail_free_buffer;
@@ -783,7 +823,8 @@ static ssize_t fuse_write(FuseExport *exp, struct fuse_write_out *out,
 
     if (offset + size > blk_len) {
         if (exp->growable) {
-            ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF);
+            ret = fuse_co_do_truncate(exp, offset + size, true,
+                                      PREALLOC_MODE_OFF);
             if (ret < 0) {
                 goto fail_free_buffer;
             }
@@ -792,7 +833,7 @@ static ssize_t fuse_write(FuseExport *exp, struct fuse_write_out *out,
         }
     }
 
-    ret = blk_pwrite(exp->common.blk, offset, size, copied, 0);
+    ret = blk_co_pwrite(exp->common.blk, offset, size, copied, 0);
     if (ret < 0) {
         goto fail_free_buffer;
     }
@@ -813,8 +854,9 @@ fail_free_buffer:
  * Let clients perform various fallocate() operations.
  * Return 0 on success (no 'out' object), and -errno on error.
  */
-static ssize_t fuse_fallocate(FuseExport *exp, uint64_t offset, uint64_t length,
-                              uint32_t mode)
+static ssize_t coroutine_fn
+fuse_co_fallocate(FuseExport *exp,
+                  uint64_t offset, uint64_t length, uint32_t mode)
 {
     int64_t blk_len;
     int ret;
@@ -823,7 +865,7 @@ static ssize_t fuse_fallocate(FuseExport *exp, uint64_t offset, uint64_t length,
         return -EACCES;
     }
 
-    blk_len = blk_getlength(exp->common.blk);
+    blk_len = blk_co_getlength(exp->common.blk);
     if (blk_len < 0) {
         return blk_len;
     }
@@ -842,14 +884,14 @@ static ssize_t fuse_fallocate(FuseExport *exp, uint64_t offset, uint64_t length,
 
         if (offset > blk_len) {
             /* No preallocation needed here */
-            ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
+            ret = fuse_co_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
             if (ret < 0) {
                 return ret;
             }
         }
 
-        ret = fuse_do_truncate(exp, offset + length, true,
-                               PREALLOC_MODE_FALLOC);
+        ret = fuse_co_do_truncate(exp, offset + length, true,
+                                  PREALLOC_MODE_FALLOC);
     }
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
     else if (mode & FALLOC_FL_PUNCH_HOLE) {
@@ -860,8 +902,9 @@ static ssize_t fuse_fallocate(FuseExport *exp, uint64_t offset, uint64_t length,
         do {
             int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
 
-            ret = blk_pwrite_zeroes(exp->common.blk, offset, size,
-                                    BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
+            ret = blk_co_pwrite_zeroes(exp->common.blk, offset, size,
+                                       BDRV_REQ_MAY_UNMAP |
+                                       BDRV_REQ_NO_FALLBACK);
             if (ret == -ENOTSUP) {
                 /*
                  * fallocate() specifies to return EOPNOTSUPP for unsupported
@@ -879,8 +922,8 @@ static ssize_t fuse_fallocate(FuseExport *exp, uint64_t offset, uint64_t length,
     else if (mode & FALLOC_FL_ZERO_RANGE) {
         if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
             /* No need for zeroes, we are going to write them ourselves */
-            ret = fuse_do_truncate(exp, offset + length, false,
-                                   PREALLOC_MODE_OFF);
+            ret = fuse_co_do_truncate(exp, offset + length, false,
+                                      PREALLOC_MODE_OFF);
             if (ret < 0) {
                 return ret;
             }
@@ -889,8 +932,8 @@ static ssize_t fuse_fallocate(FuseExport *exp, uint64_t offset, uint64_t length,
         do {
             int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
 
-            ret = blk_pwrite_zeroes(exp->common.blk,
-                                    offset, size, 0);
+            ret = blk_co_pwrite_zeroes(exp->common.blk,
+                                       offset, size, 0);
             offset += size;
             length -= size;
         } while (ret == 0 && length > 0);
@@ -907,9 +950,9 @@ static ssize_t fuse_fallocate(FuseExport *exp, uint64_t offset, uint64_t length,
  * Let clients fsync the exported image.
  * Return 0 on success (no 'out' object), and -errno on error.
  */
-static ssize_t fuse_fsync(FuseExport *exp)
+static ssize_t coroutine_fn fuse_co_fsync(FuseExport *exp)
 {
-    return blk_flush(exp->common.blk);
+    return blk_co_flush(exp->common.blk);
 }
 
 /**
@@ -917,9 +960,9 @@ static ssize_t fuse_fsync(FuseExport *exp)
  * notes this to be a way to return last-minute errors.)
  * Return 0 on success (no 'out' object), and -errno on error.
  */
-static ssize_t fuse_flush(FuseExport *exp)
+static ssize_t coroutine_fn fuse_co_flush(FuseExport *exp)
 {
-    return blk_flush(exp->common.blk);
+    return blk_co_flush(exp->common.blk);
 }
 
 #ifdef CONFIG_FUSE_LSEEK
@@ -927,8 +970,9 @@ static ssize_t fuse_flush(FuseExport *exp)
  * Let clients inquire allocation status.
  * Return the number of bytes written to *out on success, and -errno on error.
  */
-static ssize_t fuse_lseek(FuseExport *exp, struct fuse_lseek_out *out,
-                          uint64_t offset, uint32_t whence)
+static ssize_t coroutine_fn
+fuse_co_lseek(FuseExport *exp, struct fuse_lseek_out *out,
+              uint64_t offset, uint32_t whence)
 {
     if (whence != SEEK_HOLE && whence != SEEK_DATA) {
         return -EINVAL;
@@ -938,8 +982,8 @@ static ssize_t fuse_lseek(FuseExport *exp, struct fuse_lseek_out *out,
         int64_t pnum;
         int ret;
 
-        ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
-                                      offset, INT64_MAX, &pnum, NULL, NULL);
+        ret = bdrv_co_block_status_above(blk_bs(exp->common.blk), NULL,
+                                         offset, INT64_MAX, &pnum, NULL, NULL);
         if (ret < 0) {
             return ret;
         }
@@ -956,7 +1000,7 @@ static ssize_t fuse_lseek(FuseExport *exp, struct fuse_lseek_out *out,
              * and @blk_len (the client-visible EOF).
              */
 
-            blk_len = blk_getlength(exp->common.blk);
+            blk_len = blk_co_getlength(exp->common.blk);
             if (blk_len < 0) {
                 return blk_len;
             }
@@ -1091,14 +1135,14 @@ static int fuse_write_buf_response(int fd, uint32_t req_id,
 }
 
 /*
- * For use in fuse_process_request():
+ * For use in fuse_co_process_request():
  * Returns a pointer to the parameter object for the given operation (inside of
  * exp->request_buf, which is assumed to hold a fuse_in_header first).
  * Verifies that the object is complete (exp->request_buf is large enough to
  * hold it in one piece, and the request length includes the whole object).
  *
- * Note that exp->request_buf may be overwritten after polling, so the returned
- * pointer must not be used across a function that may poll!
+ * Note that exp->request_buf may be overwritten after yielding, so the returned
+ * pointer must not be used across a function that may yield!
  */
 #define FUSE_IN_OP_STRUCT(op_name, export) \
     ({ \
@@ -1122,7 +1166,7 @@ static int fuse_write_buf_response(int fd, uint32_t req_id,
     })
 
 /*
- * For use in fuse_process_request():
+ * For use in fuse_co_process_request():
  * Returns a pointer to the return object for the given operation (inside of
  * out_buf, which is assumed to hold a fuse_out_header first).
  * Verifies that out_buf is large enough to hold the whole object.
@@ -1145,12 +1189,11 @@ static int fuse_write_buf_response(int fd, uint32_t req_id,
 /**
  * Process a FUSE request, incl. writing the response.
  *
- * Note that polling in any request-processing function can lead to a nested
- * read_from_fuse_fd() call, which will overwrite the contents of
- * exp->request_buf.  Anything that takes a buffer needs to take care that the
- * content is copied before potentially polling.
+ * Note that yielding in any request-processing function can overwrite the
+ * contents of exp->request_buf.  Anything that takes a buffer needs to take
+ * care that the content is copied before yielding.
  */
-static void fuse_process_request(FuseExport *exp)
+static void coroutine_fn fuse_co_process_request(FuseExport *exp)
 {
     uint32_t opcode;
     uint64_t req_id;
@@ -1171,7 +1214,7 @@ static void fuse_process_request(FuseExport *exp)
     void *out_data_buffer = NULL;
     ssize_t ret;
 
-    /* Limit scope to ensure pointer is no longer used after polling */
+    /* Limit scope to ensure pointer is no longer used after yielding */
     {
         const struct fuse_in_header *in_hdr =
             (const struct fuse_in_header *)exp->request_buf;
@@ -1183,13 +1226,13 @@ static void fuse_process_request(FuseExport *exp)
     switch (opcode) {
     case FUSE_INIT: {
         const struct fuse_init_in *in = FUSE_IN_OP_STRUCT(init, exp);
-        ret = fuse_init(exp, FUSE_OUT_OP_STRUCT(init, out_buf),
-                        in->max_readahead, in->flags);
+        ret = fuse_co_init(exp, FUSE_OUT_OP_STRUCT(init, out_buf),
+                           in->max_readahead, in->flags);
         break;
     }
 
     case FUSE_OPEN:
-        ret = fuse_open(exp, FUSE_OUT_OP_STRUCT(open, out_buf));
+        ret = fuse_co_open(exp, FUSE_OUT_OP_STRUCT(open, out_buf));
         break;
 
     case FUSE_RELEASE:
@@ -1201,19 +1244,19 @@ static void fuse_process_request(FuseExport *exp)
         break;
 
     case FUSE_GETATTR:
-        ret = fuse_getattr(exp, FUSE_OUT_OP_STRUCT(attr, out_buf));
+        ret = fuse_co_getattr(exp, FUSE_OUT_OP_STRUCT(attr, out_buf));
         break;
 
     case FUSE_SETATTR: {
         const struct fuse_setattr_in *in = FUSE_IN_OP_STRUCT(setattr, exp);
-        ret = fuse_setattr(exp, FUSE_OUT_OP_STRUCT(attr, out_buf),
-                           in->valid, in->size, in->mode, in->uid, in->gid);
+        ret = fuse_co_setattr(exp, FUSE_OUT_OP_STRUCT(attr, out_buf),
+                              in->valid, in->size, in->mode, in->uid, in->gid);
         break;
     }
 
     case FUSE_READ: {
         const struct fuse_read_in *in = FUSE_IN_OP_STRUCT(read, exp);
-        ret = fuse_read(exp, &out_data_buffer, in->offset, in->size);
+        ret = fuse_co_read(exp, &out_data_buffer, in->offset, in->size);
         break;
     }
 
@@ -1241,33 +1284,33 @@ static void fuse_process_request(FuseExport *exp)
 
         /*
          * Passing a pointer to `in` (i.e. the request buffer) is fine because
-         * fuse_write() takes care to copy its contents before potentially
-         * polling.
+         * fuse_co_write() takes care to copy its contents before potentially
+         * yielding.
          */
-        ret = fuse_write(exp, FUSE_OUT_OP_STRUCT(write, out_buf),
-                         in->offset, in->size, in + 1);
+        ret = fuse_co_write(exp, FUSE_OUT_OP_STRUCT(write, out_buf),
+                            in->offset, in->size, in + 1);
         break;
     }
 
     case FUSE_FALLOCATE: {
         const struct fuse_fallocate_in *in = FUSE_IN_OP_STRUCT(fallocate, exp);
-        ret = fuse_fallocate(exp, in->offset, in->length, in->mode);
+        ret = fuse_co_fallocate(exp, in->offset, in->length, in->mode);
         break;
     }
 
     case FUSE_FSYNC:
-        ret = fuse_fsync(exp);
+        ret = fuse_co_fsync(exp);
         break;
 
     case FUSE_FLUSH:
-        ret = fuse_flush(exp);
+        ret = fuse_co_flush(exp);
         break;
 
 #ifdef CONFIG_FUSE_LSEEK
     case FUSE_LSEEK: {
         const struct fuse_lseek_in *in = FUSE_IN_OP_STRUCT(lseek, exp);
-        ret = fuse_lseek(exp, FUSE_OUT_OP_STRUCT(lseek, out_buf),
-                         in->offset, in->whence);
+        ret = fuse_co_lseek(exp, FUSE_OUT_OP_STRUCT(lseek, out_buf),
+                            in->offset, in->whence);
         break;
     }
 #endif
-- 
2.48.1


Re: [PATCH 13/15] fuse: Process requests in coroutines
Posted by Stefan Hajnoczi 5 days, 21 hours ago
On Tue, Mar 25, 2025 at 05:06:53PM +0100, Hanna Czenczek wrote:
> Make fuse_process_request() a coroutine_fn (fuse_co_process_request())
> and have read_from_fuse_fd() launch it inside of a newly created
> coroutine instead of running it synchronously.  This way, we can process
> requests in parallel.
> 
> These are the benchmark results, compared to (a) the original results
> with libfuse, and (b) the results after switching away from libfuse
> (i.e. before this patch):
> 
> file:                  (vs. libfuse / vs. no libfuse)
>   read:
>     seq aio:   120.6k ±1.1k (+ 53 % / + 58 %)
>     rand aio:  113.3k ±5.9k (+188 % / +325 %)
>     seq sync:   52.4k ±0.4k (+ 61 % / + 10 %)
>     rand sync:  10.4k ±0.4k (+  6 % / +  3 %)
>   write:
>     seq aio:    79.8k ±0.8k (+ 29 % / + 37 %)
>     rand aio:   79.0k ±0.6k (+ 29 % / + 36 %)
>     seq sync:   41.5k ±0.3k (+ 49 % / + 15 %)
>     rand sync:  41.4k ±0.2k (+ 50 % / + 15 %)
> null:
>   read:
>     seq aio:   266.1k ±1.5k (+ 24 % / -  1 %)
>     rand aio:  264.1k ±2.5k (+ 24 % / ±  0 %)
>     seq sync:  135.6k ±3.2k (+ 50 % / +  1 %)
>     rand sync: 134.7k ±3.0k (+ 50 % / +  2 %)
>   write:
>     seq aio:   281.0k ±1.8k (+ 38 % / +  2 %)
>     rand aio:  288.1k ±6.1k (+ 43 % / +  6 %)
>     seq sync:  142.2k ±3.1k (+ 65 % / +  9 %)
>     rand sync: 141.1k ±2.9k (+ 66 % / + 11 %)
> 
> So for non-AIO cases (and the null driver, which does not yield), there
> is little change; but for file AIO, results greatly improve, resolving
> the performance issue we saw before (when switching away from libfuse).
> 
> Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
> ---
>  block/export/fuse.c | 209 ++++++++++++++++++++++++++------------------
>  1 file changed, 126 insertions(+), 83 deletions(-)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>