From: Horst Birthelmer <hbirthelmer@ddn.com>
For a FUSE_COMPOUND we add a small header that informs the
fuse server how much buffer memory the kernel has for the result.
This will make the interpretation in libfuse easier,
since we can preallocate the whole result and work on the return
buffer.
Then we append the requests that belong to this compound.
The API for the compound command has:
fuse_compound_alloc()
fuse_compound_add()
fuse_compound_send()
fuse_compound_free()
Signed-off-by: Horst Birthelmer <hbirthelmer@ddn.com>
---
fs/fuse/Makefile | 2 +-
fs/fuse/compound.c | 308 ++++++++++++++++++++++++++++++++++++++++++++++
fs/fuse/fuse_i.h | 39 ++++++
include/uapi/linux/fuse.h | 52 ++++++++
4 files changed, 400 insertions(+), 1 deletion(-)
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 22ad9538dfc4b80c6d9b52235bdfead6a6567ae4..4c09038ef995d1b9133c2b6871b97b280a4693b0 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_CUSE) += cuse.o
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
fuse-y := trace.o # put trace.o first so we see ftrace errors sooner
-fuse-y += dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
+fuse-y += dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o compound.o
fuse-y += iomode.o
fuse-$(CONFIG_FUSE_DAX) += dax.o
fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o backing.o
diff --git a/fs/fuse/compound.c b/fs/fuse/compound.c
new file mode 100644
index 0000000000000000000000000000000000000000..68f30123f39b244dd82b835717077cc271518e14
--- /dev/null
+++ b/fs/fuse/compound.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2025-2026
+ *
+ * Compound operations for FUSE - batch multiple operations into a single
+ * request to reduce round trips between kernel and userspace.
+ */
+
+#include "fuse_i.h"
+
+struct fuse_compound_req *fuse_compound_alloc(struct fuse_mount *fm,
+ u32 max_count, u32 flags)
+{
+ struct fuse_compound_req *compound;
+
+ if (max_count == 0)
+ return NULL;
+
+ compound = kzalloc(sizeof(*compound), GFP_KERNEL);
+ if (!compound)
+ return NULL;
+
+ compound->max_count = max_count;
+ compound->count = 0;
+ compound->fm = fm;
+ compound->compound_header.flags = flags;
+
+ compound->op_errors = kcalloc(max_count, sizeof(int), GFP_KERNEL);
+ if (!compound->op_errors)
+ goto out_free_compound;
+
+ compound->op_args = kcalloc(max_count, sizeof(struct fuse_args *),
+ GFP_KERNEL);
+ if (!compound->op_args)
+ goto out_free_op_errors;
+
+ compound->op_converters = kcalloc(max_count,
+ sizeof(int (*)(struct fuse_compound_req *, unsigned int)),
+ GFP_KERNEL);
+ if (!compound->op_converters)
+ goto out_free_op_args;
+
+ return compound;
+
+out_free_op_args:
+ kfree(compound->op_args);
+out_free_op_errors:
+ kfree(compound->op_errors);
+out_free_compound:
+ kfree(compound);
+ return NULL;
+}
+
+void fuse_compound_free(struct fuse_compound_req *compound)
+{
+ kfree(compound->op_errors);
+ kfree(compound->op_args);
+ kfree(compound->op_converters);
+ kfree(compound);
+}
+
+int fuse_compound_add(struct fuse_compound_req *compound,
+ struct fuse_args *args,
+ int (*converter)(struct fuse_compound_req *compound,
+ unsigned int index))
+{
+ if (!compound || compound->count >= compound->max_count)
+ return -EINVAL;
+
+ if (args->in_pages)
+ return -EINVAL;
+
+ compound->op_args[compound->count] = args;
+ compound->op_converters[compound->count] = converter;
+ compound->count++;
+ return 0;
+}
+
+static void fuse_copy_resp_data_per_req(const struct fuse_args *args,
+ char *resp)
+{
+ const struct fuse_arg *arg;
+ int i;
+
+ for (i = 0; i < args->out_numargs; i++) {
+ arg = &args->out_args[i];
+ memcpy(arg->value, resp, arg->size);
+ resp += arg->size;
+ }
+}
+
+static char *fuse_compound_parse_one_op(struct fuse_compound_req *compound,
+ char *response,
+ char *response_end,
+ int op_count)
+{
+ struct fuse_out_header *op_hdr = (struct fuse_out_header *)response;
+ struct fuse_args *args;
+
+ if (op_hdr->len < sizeof(struct fuse_out_header))
+ return NULL;
+
+ if (response + op_hdr->len > response_end)
+ return NULL;
+
+ if (op_count >= compound->max_count)
+ return NULL;
+
+ if (op_hdr->error) {
+ compound->op_errors[op_count] = op_hdr->error;
+ } else {
+ args = compound->op_args[op_count];
+ fuse_copy_resp_data_per_req(args, response +
+ sizeof(struct fuse_out_header));
+ }
+
+ /* In case of error, we still need to advance to the next op */
+ return response + op_hdr->len;
+}
+
+static int fuse_compound_parse_resp(struct fuse_compound_req *compound,
+ char *response, char *response_end)
+{
+ int op_count = 0;
+
+ while (response < response_end) {
+ response = fuse_compound_parse_one_op(compound, response,
+ response_end, op_count);
+ if (!response)
+ return -EIO;
+ op_count++;
+ }
+
+ return 0;
+}
+
+static int fuse_handle_compound_results(struct fuse_compound_req *compound,
+ struct fuse_args *args)
+{
+ size_t actual_response_size;
+ size_t buffer_size;
+ char *resp_payload_buffer;
+ int ret;
+
+ buffer_size = compound->compound_header.result_size +
+ compound->count * sizeof(struct fuse_out_header);
+
+ resp_payload_buffer = args->out_args[1].value;
+ actual_response_size = args->out_args[1].size;
+
+ if (actual_response_size <= buffer_size) {
+ ret = fuse_compound_parse_resp(compound,
+ (char *)resp_payload_buffer,
+ resp_payload_buffer +
+ actual_response_size);
+ } else {
+ /* FUSE server sent more data than expected */
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+/*
+ * Build a single operation request in the buffer
+ *
+ * Returns the new buffer position after writing the operation.
+ */
+static char *fuse_compound_build_one_op(struct fuse_conn *fc,
+ struct fuse_args *op_args,
+ char *buffer_pos,
+ unsigned int index)
+{
+ struct fuse_in_header *hdr;
+ size_t needed_size = sizeof(struct fuse_in_header);
+ int j;
+
+ for (j = 0; j < op_args->in_numargs; j++)
+ needed_size += op_args->in_args[j].size;
+
+ hdr = (struct fuse_in_header *)buffer_pos;
+ hdr->unique = index;
+ hdr->len = needed_size;
+ hdr->opcode = op_args->opcode;
+ hdr->nodeid = op_args->nodeid;
+ buffer_pos += sizeof(*hdr);
+
+ for (j = 0; j < op_args->in_numargs; j++) {
+ memcpy(buffer_pos, op_args->in_args[j].value,
+ op_args->in_args[j].size);
+ buffer_pos += op_args->in_args[j].size;
+ }
+
+ return buffer_pos;
+}
+
+static ssize_t fuse_compound_fallback_separate(struct fuse_compound_req *compound)
+{
+ unsigned int req_count = compound->count;
+ ssize_t ret = 0;
+ unsigned int i;
+
+ /* Try separate requests */
+ for (i = 0; i < req_count; i++) {
+ /* fill the current args from the already received responses */
+ if (compound->op_converters[i])
+ ret = compound->op_converters[i](compound, i);
+
+ ret = fuse_simple_request(compound->fm, compound->op_args[i]);
+ if (ret < 0) {
+ compound->op_errors[i] = ret;
+ if (!(compound->compound_header.flags & FUSE_COMPOUND_CONTINUE))
+ break;
+ }
+ }
+
+ return ret;
+}
+
+ssize_t fuse_compound_send(struct fuse_compound_req *compound)
+{
+ struct fuse_conn *fc = compound->fm->fc;
+ struct fuse_args args = {
+ .opcode = FUSE_COMPOUND,
+ .in_numargs = 2,
+ .out_numargs = 2,
+ .out_argvar = true,
+ };
+ unsigned int req_count = compound->count;
+ size_t total_expected_out_size = 0;
+ size_t buffer_size = 0;
+ void *resp_payload_buffer;
+ char *buffer_pos;
+ void *buffer = NULL;
+ ssize_t ret;
+ unsigned int i, j;
+
+ for (i = 0; i < req_count; i++) {
+ struct fuse_args *op_args = compound->op_args[i];
+ size_t needed_size = sizeof(struct fuse_in_header);
+
+ for (j = 0; j < op_args->in_numargs; j++)
+ needed_size += op_args->in_args[j].size;
+
+ buffer_size += needed_size;
+
+ for (j = 0; j < op_args->out_numargs; j++)
+ total_expected_out_size += op_args->out_args[j].size;
+ }
+
+ buffer = kzalloc(buffer_size, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+ buffer_pos = buffer;
+ for (i = 0; i < req_count; i++) {
+ if (compound->op_converters[i]) {
+ ret = compound->op_converters[i](compound, i);
+ if (ret < 0)
+ goto out_free_buffer;
+ }
+
+ buffer_pos = fuse_compound_build_one_op(fc,
+ compound->op_args[i],
+ buffer_pos, i);
+ }
+
+ compound->compound_header.result_size = total_expected_out_size;
+
+ args.in_args[0].size = sizeof(compound->compound_header);
+ args.in_args[0].value = &compound->compound_header;
+ args.in_args[1].size = buffer_size;
+ args.in_args[1].value = buffer;
+
+ buffer_size = total_expected_out_size +
+ req_count * sizeof(struct fuse_out_header);
+
+ resp_payload_buffer = kzalloc(buffer_size, GFP_KERNEL);
+ if (!resp_payload_buffer) {
+ ret = -ENOMEM;
+ goto out_free_buffer;
+ }
+
+ args.out_args[0].size = sizeof(compound->result_header);
+ args.out_args[0].value = &compound->result_header;
+ args.out_args[1].size = buffer_size;
+ args.out_args[1].value = resp_payload_buffer;
+
+ ret = fuse_simple_request(compound->fm, &args);
+ if (ret < 0)
+ goto fallback_separate;
+
+ ret = fuse_handle_compound_results(compound, &args);
+ if (ret == 0)
+ goto out;
+
+fallback_separate:
+ /* Kernel tries to fallback to separate requests */
+ if (!(compound->compound_header.flags & FUSE_COMPOUND_ATOMIC))
+ ret = fuse_compound_fallback_separate(compound);
+
+out:
+ kfree(resp_payload_buffer);
+out_free_buffer:
+ kfree(buffer);
+ return ret;
+}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 7f16049387d15e869db4be23a93605098588eda9..e46315aa428c9d0e704c62a0b80811172c5ec9c1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1273,6 +1273,45 @@ static inline ssize_t fuse_simple_idmap_request(struct mnt_idmap *idmap,
int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
gfp_t gfp_flags);
+/*
+ * Compound request builder, state tracker, and args pointer storage
+ */
+struct fuse_compound_req {
+ struct fuse_mount *fm;
+ struct fuse_compound_in compound_header;
+ struct fuse_compound_out result_header;
+
+ struct fuse_args **op_args;
+
+ /*
+ * Every op can add a converter function to construct the ops args from
+ * the already received responses.
+ */
+ int (**op_converters)(struct fuse_compound_req *compound,
+ unsigned int index);
+ int *op_errors;
+
+ unsigned int max_count;
+ unsigned int count;
+};
+
+/*
+ * Compound request API
+ */
+ssize_t fuse_compound_send(struct fuse_compound_req *compound);
+
+struct fuse_compound_req *fuse_compound_alloc(struct fuse_mount *fm,
+ u32 max_count, u32 flags);
+int fuse_compound_add(struct fuse_compound_req *compound,
+ struct fuse_args *args,
+ int (*converter)(struct fuse_compound_req *compound,
+ unsigned int index));
+void fuse_compound_free(struct fuse_compound_req *compound);
+static inline int fuse_compound_get_error(struct fuse_compound_req *compound, int op_idx)
+{
+ return compound->op_errors[op_idx];
+}
+
/**
* Assign a unique id to a fuse request
*/
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index c13e1f9a2f12bd39f535188cb5466688eba42263..d43bffd1ccbe2b3d144864407d60ff7a48db53ed 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -664,6 +664,13 @@ enum fuse_opcode {
FUSE_STATX = 52,
FUSE_COPY_FILE_RANGE_64 = 53,
+ /* A compound request is handled like a single request,
+ * but contains multiple requests as input.
+ * This can be used to signal to the fuse server that
+ * the requests can be combined atomically.
+ */
+ FUSE_COMPOUND = 54,
+
/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -1245,6 +1252,51 @@ struct fuse_supp_groups {
uint32_t groups[];
};
+/*
+ * This is a hint to the fuse server that all requests are complete and it can
+ * use automatic decoding and sequential processing from libfuse.
+ */
+#define FUSE_COMPOUND_SEPARABLE (1 << 0)
+/*
+ * This will be used by the kernel to continue on
+ * even after one of the requests fail.
+ */
+#define FUSE_COMPOUND_CONTINUE (1 << 1)
+/*
+ * This flags the compound as atomic, which
+ * means that the operation has to be interpreted
+ * atomically and be directly supported by the fuse server
+ * itself.
+ */
+#define FUSE_COMPOUND_ATOMIC (1 << 2)
+
+/*
+ * Compound request header
+ *
+ * This header is followed by the fuse requests
+ */
+struct fuse_compound_in {
+ uint32_t flags; /* Compound flags */
+
+ /* Total size of all results expected from the fuse server.
+ * This is needed for preallocating the whole result for all
+ * commands in the fuse server.
+ */
+ uint32_t result_size;
+ uint64_t reserved;
+};
+
+/*
+ * Compound response header
+ *
+ * This header is followed by complete fuse responses
+ */
+struct fuse_compound_out {
+ uint32_t flags; /* Result flags */
+ uint32_t padding;
+ uint64_t reserved;
+};
+
/**
* Size of the ring buffer header
*/
--
2.53.0
On Thu, 26 Feb 2026 at 17:43, Horst Birthelmer <horst@birthelmer.com> wrote:
> +int fuse_compound_add(struct fuse_compound_req *compound,
> + struct fuse_args *args,
> + int (*converter)(struct fuse_compound_req *compound,
> + unsigned int index))
> +{
> + if (!compound || compound->count >= compound->max_count)
> + return -EINVAL;
> +
> + if (args->in_pages)
> + return -EINVAL;
WARN_ON()
> +
> + compound->op_args[compound->count] = args;
This could be done *much* simpler with lists. Just add a 'struct
list_head list' member to struct fuse_args and pass a 'struct
list_head *compound' to fuse_compound_add(). No need for
fuse_compound_alloc/free().
Alternatively pass a 'void **' to fuse_compound_add(), where the input
args could be copied directly. This has the advantage of not having to
keep the args around, so they could be local to the fill function.
After the request is done the responses would similarly be decoded
into the outargs.
Both approaches have advantages and disadvantages, I don't see a clear winner.
> + compound->op_converters[compound->count] = converter;
What are these converters?
> +ssize_t fuse_compound_send(struct fuse_compound_req *compound)
> +{
> + struct fuse_conn *fc = compound->fm->fc;
> + struct fuse_args args = {
> + .opcode = FUSE_COMPOUND,
> + .in_numargs = 2,
> + .out_numargs = 2,
> + .out_argvar = true,
> + };
> + unsigned int req_count = compound->count;
> + size_t total_expected_out_size = 0;
> + size_t buffer_size = 0;
> + void *resp_payload_buffer;
> + char *buffer_pos;
> + void *buffer = NULL;
> + ssize_t ret;
> + unsigned int i, j;
> +
> + for (i = 0; i < req_count; i++) {
> + struct fuse_args *op_args = compound->op_args[i];
> + size_t needed_size = sizeof(struct fuse_in_header);
> +
> + for (j = 0; j < op_args->in_numargs; j++)
> + needed_size += op_args->in_args[j].size;
> +
> + buffer_size += needed_size;
> +
> + for (j = 0; j < op_args->out_numargs; j++)
> + total_expected_out_size += op_args->out_args[j].size;
> + }
> +
> + buffer = kzalloc(buffer_size, GFP_KERNEL);
> + if (!buffer)
> + return -ENOMEM;
> +
> + buffer_pos = buffer;
> + for (i = 0; i < req_count; i++) {
> + if (compound->op_converters[i]) {
> + ret = compound->op_converters[i](compound, i);
> + if (ret < 0)
> + goto out_free_buffer;
> + }
> +
> + buffer_pos = fuse_compound_build_one_op(fc,
> + compound->op_args[i],
> + buffer_pos, i);
> + }
> +
> + compound->compound_header.result_size = total_expected_out_size;
> +
> + args.in_args[0].size = sizeof(compound->compound_header);
> + args.in_args[0].value = &compound->compound_header;
> + args.in_args[1].size = buffer_size;
> + args.in_args[1].value = buffer;
> +
> + buffer_size = total_expected_out_size +
> + req_count * sizeof(struct fuse_out_header);
> +
> + resp_payload_buffer = kzalloc(buffer_size, GFP_KERNEL);
> + if (!resp_payload_buffer) {
> + ret = -ENOMEM;
> + goto out_free_buffer;
> + }
> +
> + args.out_args[0].size = sizeof(compound->result_header);
> + args.out_args[0].value = &compound->result_header;
> + args.out_args[1].size = buffer_size;
> + args.out_args[1].value = resp_payload_buffer;
> +
> + ret = fuse_simple_request(compound->fm, &args);
> + if (ret < 0)
> + goto fallback_separate;
> +
> + ret = fuse_handle_compound_results(compound, &args);
> + if (ret == 0)
> + goto out;
> +
> +fallback_separate:
> + /* Kernel tries to fallback to separate requests */
> + if (!(compound->compound_header.flags & FUSE_COMPOUND_ATOMIC))
> + ret = fuse_compound_fallback_separate(compound);
> +
> +out:
> + kfree(resp_payload_buffer);
> +out_free_buffer:
> + kfree(buffer);
> + return ret;
> +}
If we go with the list of fuse_args, then all the above logic could go
into the lower layer (dev.c) which already handles fuse_args ->
request -> fuse_args conversion. What's needed is mostly just a loop
that repeats this for all the sub requests.
> +struct fuse_compound_req {
> + struct fuse_mount *fm;
> + struct fuse_compound_in compound_header;
> + struct fuse_compound_out result_header;
> +
> + struct fuse_args **op_args;
> +
> + /*
> + * Every op can add a converter function to construct the ops args from
> + * the already received responses.
> + */
> + int (**op_converters)(struct fuse_compound_req *compound,
> + unsigned int index);
> + int *op_errors;
Can go into fuse_args.
> +
> + unsigned int max_count;
> + unsigned int count;
> +};
> +/*
> + * This is a hint to the fuse server that all requests are complete and it can
> + * use automatic decoding and sequential processing from libfuse.
> + */
> +#define FUSE_COMPOUND_SEPARABLE (1 << 0)
We really need per sub-request flags, not per-compound flags.
I.e:
FUSE_SUB_IS_ENTRY - this sub request will return a new entry on
success (nodeid, filehandle)
FUSE_SUB_DEP_ENTRY - this sub request depends on the result of a previous lookup
> +/*
> + * This will be used by the kernel to continue on
> + * even after one of the requests fail.
> + */
> +#define FUSE_COMPOUND_CONTINUE (1 << 1)
Again, I think it makes no sense to have compound-global flags, since
it might be possible that there are several sub-requests and there are
different dependencies between for each of them.
Also if there are no examples of a certain flag in this patchset, then
it's better to just leave it out and add it together with the actual
user.
> +/*
> + * This flags the compound as atomic, which
> + * means that the operation has to be interpreted
> + * atomically and be directly supported by the fuse server
> + * itself.
> + */
> +#define FUSE_COMPOUND_ATOMIC (1 << 2)
Why would this be needed? The VFS provides the locking that ensures
atomicity, even if the implementation is not atomic. At least for
local filesystems that is always the case.
> +
> +/*
> + * Compound request header
> + *
> + * This header is followed by the fuse requests
> + */
> +struct fuse_compound_in {
> + uint32_t flags; /* Compound flags */
Not needed.
> +
> + /* Total size of all results expected from the fuse server.
> + * This is needed for preallocating the whole result for all
> + * commands in the fuse server.
> + */
> + uint32_t result_size;
Please drop this. I think libfuse can allocate separate buffers for
each sub-request's out arg and hand a vector of these to the transport
layer.
> + uint64_t reserved;
So it turns out the compound header is empty. Not a problem, just
make it contain 'uint64_t reserved[2]' for future use.
> +};
> +
> +/*
> + * Compound response header
> + *
> + * This header is followed by complete fuse responses
> + */
> +struct fuse_compound_out {
> + uint32_t flags; /* Result flags */
What is this for?
> + uint32_t padding;
> + uint64_t reserved;
> +};
Thanks,
Miklos
On Fri, Feb 27, 2026 at 10:45:36AM +0100, Miklos Szeredi wrote: > On Thu, 26 Feb 2026 at 17:43, Horst Birthelmer <horst@birthelmer.com> wrote: > > > + > > +fallback_separate: > > + /* Kernel tries to fallback to separate requests */ > > + if (!(compound->compound_header.flags & FUSE_COMPOUND_ATOMIC)) > > + ret = fuse_compound_fallback_separate(compound); > > + > > +out: > > + kfree(resp_payload_buffer); > > +out_free_buffer: > > + kfree(buffer); > > + return ret; > > +} > > If we go with the list of fuse_args, then all the above logic could go > into the lower layer (dev.c) which already handles fuse_args -> > request -> fuse_args conversion. What's needed is mostly just a loop > that repeats this for all the sub requests. > > I have actually implemented this idea and avoided any memory allocation. So the short version is, it can be done. But to me this looks kinda ugly and a bit wrong. I have to check in the lower layer for an opcode from the upper layer and 'stream' the args. (in fuse_dev_do_read() or somewhere in that region there has to be a check for FUSE_COMPOUND and then call into different code) When handled on that level it has to be handled for io-uring slightly differently as well. I will test this a bit more and provide a new version unless someone tells me that this is not the right direction. > > Thanks, > Miklos Thanks, Horst
On Fri, Feb 27, 2026 at 10:45:36AM +0100, Miklos Szeredi wrote: > On Thu, 26 Feb 2026 at 17:43, Horst Birthelmer <horst@birthelmer.com> wrote: > > + > > + unsigned int max_count; > > + unsigned int count; > > +}; > > +/* > > + * This is a hint to the fuse server that all requests are complete and it can > > + * use automatic decoding and sequential processing from libfuse. > > + */ > > +#define FUSE_COMPOUND_SEPARABLE (1 << 0) > > We really need per sub-request flags, not per-compound flags. > > I.e: > > FUSE_SUB_IS_ENTRY - this sub request will return a new entry on > success (nodeid, filehandle) > FUSE_SUB_DEP_ENTRY - this sub request depends on the result of a previous lookup > Couldn't we just save boolean flags in the fuse_args? Something like 'bool is_sub_entry:1' and so on? If we have the automatic separation and call of requests in the kernel when the fuse server returns ENOSYS, I don't see the point in adding this to libfuse as well, since there will never be the case, that kernel doesn't support compounds but libfuse does. It's either the fuse server handles the whole compound, or the kernel does. My point is, we don't need to send that information anywhere. > Thanks, > Miklos Thanks for taking the time, Horst
On Mon, 2 Mar 2026 at 10:56, Horst Birthelmer <horst@birthelmer.de> wrote: > > On Fri, Feb 27, 2026 at 10:45:36AM +0100, Miklos Szeredi wrote: > > On Thu, 26 Feb 2026 at 17:43, Horst Birthelmer <horst@birthelmer.com> wrote: > > > + > > > + unsigned int max_count; > > > + unsigned int count; > > > +}; > > > +/* > > > + * This is a hint to the fuse server that all requests are complete and it can > > > + * use automatic decoding and sequential processing from libfuse. > > > + */ > > > +#define FUSE_COMPOUND_SEPARABLE (1 << 0) > > > > We really need per sub-request flags, not per-compound flags. > > > > I.e: > > > > FUSE_SUB_IS_ENTRY - this sub request will return a new entry on > > success (nodeid, filehandle) > > FUSE_SUB_DEP_ENTRY - this sub request depends on the result of a previous lookup > > > > Couldn't we just save boolean flags in the fuse_args? > Something like 'bool is_sub_entry:1' and so on? Sure, that's fine. > If we have the automatic separation and call of requests in the kernel > when the fuse server returns ENOSYS, I don't see the point in adding this > to libfuse as well, since there will never be the case, that kernel > doesn't support compounds but libfuse does. > It's either the fuse server handles the whole compound, or the kernel does. No, I think the library is in a good position to handle compounds, because that can reduce the complexity in the server while keeping most of the performance benefits. > My point is, we don't need to send that information anywhere. We need to send that information in any case. It needs to be part of the matching done by the server to "recognize" a certain compound, because the same sequence of operations could have different meaning if the dependencies are different. Thanks, Miklos
On Mon, Mar 02, 2026 at 12:03:35PM +0100, Miklos Szeredi wrote: > On Mon, 2 Mar 2026 at 10:56, Horst Birthelmer <horst@birthelmer.de> wrote: > > > > On Fri, Feb 27, 2026 at 10:45:36AM +0100, Miklos Szeredi wrote: > > > On Thu, 26 Feb 2026 at 17:43, Horst Birthelmer <horst@birthelmer.com> wrote: > > > > + > > > > + unsigned int max_count; > > > > + unsigned int count; > > > > +}; > > > > +/* > > > > + * This is a hint to the fuse server that all requests are complete and it can > > > > + * use automatic decoding and sequential processing from libfuse. > > > > + */ > > > > +#define FUSE_COMPOUND_SEPARABLE (1 << 0) > > > > > > We really need per sub-request flags, not per-compound flags. > > > > > > I.e: > > > > > > FUSE_SUB_IS_ENTRY - this sub request will return a new entry on > > > success (nodeid, filehandle) > > > FUSE_SUB_DEP_ENTRY - this sub request depends on the result of a previous lookup > > > > > > > Couldn't we just save boolean flags in the fuse_args? > > Something like 'bool is_sub_entry:1' and so on? > > Sure, that's fine. > > > If we have the automatic separation and call of requests in the kernel > > when the fuse server returns ENOSYS, I don't see the point in adding this > > to libfuse as well, since there will never be the case, that kernel > > doesn't support compounds but libfuse does. > > It's either the fuse server handles the whole compound, or the kernel does. > > No, I think the library is in a good position to handle compounds, > because that can reduce the complexity in the server while keeping > most of the performance benefits. > > > My point is, we don't need to send that information anywhere. > > We need to send that information in any case. It needs to be part of > the matching done by the server to "recognize" a certain compound, > because the same sequence of operations could have different meaning > if the dependencies are different. OK, if I have to send flags, that are only present if the fuse request is inside a compound then I would suggest that we preface the fuse request with a small compound header, where we store that information. I would not want to change the fuse request, especially not define the same flags for every type of fuse requests. Would that be acceptable? > > Thanks, > Miklos > Thanks, Horst
On Mon, 2 Mar 2026 at 14:19, Horst Birthelmer <horst@birthelmer.de> wrote: > OK, if I have to send flags, that are only present if the fuse request > is inside a compound then I would suggest that we preface the fuse request > with a small compound header, where we store that information. > > I would not want to change the fuse request, especially not define the same > flags for every type of fuse requests. > > Would that be acceptable? Yes, a separate header is the cleanest approach, Thanks, Miklos
On Fri, Feb 27, 2026 at 10:45:36AM +0100, Miklos Szeredi wrote:
> On Thu, 26 Feb 2026 at 17:43, Horst Birthelmer <horst@birthelmer.com> wrote:
>
> > +int fuse_compound_add(struct fuse_compound_req *compound,
> > + struct fuse_args *args,
> > + int (*converter)(struct fuse_compound_req *compound,
> > + unsigned int index))
> > +{
> > + if (!compound || compound->count >= compound->max_count)
> > + return -EINVAL;
> > +
> > + if (args->in_pages)
> > + return -EINVAL;
>
> WARN_ON()
>
> > +
> > + compound->op_args[compound->count] = args;
>
> This could be done *much* simpler with lists. Just add a 'struct
> list_head list' member to struct fuse_args and pass a 'struct
> list_head *compound' to fuse_compound_add(). No need for
> fuse_compound_alloc/free().
>
> Alternatively pass a 'void **' to fuse_compound_add(), where the input
> args could be copied directly. This has the advantage of not having to
> keep the args around, so they could be local to the fill function.
> After the request is done the responses would similarly be decoded
> into the outargs.
>
> Both approaches have advantages and disadvantages, I don't see a clear winner.
Will have another go at this.
> > + compound->op_converters[compound->count] = converter;
>
> What are these converters?
This was my way of dealing with the interdependencies.
The automatic sequencialization will call this for every request.
So we can copy and manipulate the args for the next request.
No need for any other flags then. We can provide one or more
of this callback functions and be done.
>
> > +ssize_t fuse_compound_send(struct fuse_compound_req *compound)
> > +{
> > + struct fuse_conn *fc = compound->fm->fc;
> > + struct fuse_args args = {
> > + .opcode = FUSE_COMPOUND,
> > + .in_numargs = 2,
> > + .out_numargs = 2,
> > + .out_argvar = true,
> > + };
> > + unsigned int req_count = compound->count;
> > + size_t total_expected_out_size = 0;
> > + size_t buffer_size = 0;
> > + void *resp_payload_buffer;
> > + char *buffer_pos;
> > + void *buffer = NULL;
> > + ssize_t ret;
> > + unsigned int i, j;
> > +
> > + for (i = 0; i < req_count; i++) {
> > + struct fuse_args *op_args = compound->op_args[i];
> > + size_t needed_size = sizeof(struct fuse_in_header);
> > +
> > + for (j = 0; j < op_args->in_numargs; j++)
> > + needed_size += op_args->in_args[j].size;
> > +
> > + buffer_size += needed_size;
> > +
> > + for (j = 0; j < op_args->out_numargs; j++)
> > + total_expected_out_size += op_args->out_args[j].size;
> > + }
> > +
> > + buffer = kzalloc(buffer_size, GFP_KERNEL);
> > + if (!buffer)
> > + return -ENOMEM;
> > +
> > + buffer_pos = buffer;
> > + for (i = 0; i < req_count; i++) {
> > + if (compound->op_converters[i]) {
> > + ret = compound->op_converters[i](compound, i);
> > + if (ret < 0)
> > + goto out_free_buffer;
> > + }
> > +
> > + buffer_pos = fuse_compound_build_one_op(fc,
> > + compound->op_args[i],
> > + buffer_pos, i);
> > + }
> > +
> > + compound->compound_header.result_size = total_expected_out_size;
> > +
> > + args.in_args[0].size = sizeof(compound->compound_header);
> > + args.in_args[0].value = &compound->compound_header;
> > + args.in_args[1].size = buffer_size;
> > + args.in_args[1].value = buffer;
> > +
> > + buffer_size = total_expected_out_size +
> > + req_count * sizeof(struct fuse_out_header);
> > +
> > + resp_payload_buffer = kzalloc(buffer_size, GFP_KERNEL);
> > + if (!resp_payload_buffer) {
> > + ret = -ENOMEM;
> > + goto out_free_buffer;
> > + }
> > +
> > + args.out_args[0].size = sizeof(compound->result_header);
> > + args.out_args[0].value = &compound->result_header;
> > + args.out_args[1].size = buffer_size;
> > + args.out_args[1].value = resp_payload_buffer;
> > +
> > + ret = fuse_simple_request(compound->fm, &args);
> > + if (ret < 0)
> > + goto fallback_separate;
> > +
> > + ret = fuse_handle_compound_results(compound, &args);
> > + if (ret == 0)
> > + goto out;
> > +
> > +fallback_separate:
> > + /* Kernel tries to fallback to separate requests */
> > + if (!(compound->compound_header.flags & FUSE_COMPOUND_ATOMIC))
> > + ret = fuse_compound_fallback_separate(compound);
> > +
> > +out:
> > + kfree(resp_payload_buffer);
> > +out_free_buffer:
> > + kfree(buffer);
> > + return ret;
> > +}
>
> If we go with the list of fuse_args, then all the above logic could go
> into the lower layer (dev.c) which already handles fuse_args ->
> request -> fuse_args conversion. What's needed is mostly just a loop
> that repeats this for all the sub requests.
>
>
> > +struct fuse_compound_req {
> > + struct fuse_mount *fm;
> > + struct fuse_compound_in compound_header;
> > + struct fuse_compound_out result_header;
> > +
> > + struct fuse_args **op_args;
> > +
> > + /*
> > + * Every op can add a converter function to construct the ops args from
> > + * the already received responses.
> > + */
> > + int (**op_converters)(struct fuse_compound_req *compound,
> > + unsigned int index);
> > + int *op_errors;
>
> Can go into fuse_args.
>
> > +
> > + unsigned int max_count;
> > + unsigned int count;
> > +};
> > +/*
> > + * This is a hint to the fuse server that all requests are complete and it can
> > + * use automatic decoding and sequential processing from libfuse.
> > + */
> > +#define FUSE_COMPOUND_SEPARABLE (1 << 0)
>
> We really need per sub-request flags, not per-compound flags.
>
> I.e:
>
> FUSE_SUB_IS_ENTRY - this sub request will return a new entry on
> success (nodeid, filehandle)
> FUSE_SUB_DEP_ENTRY - this sub request depends on the result of a previous lookup
>
we don't need this if we use my converters from above.
> > +/*
> > + * This will be used by the kernel to continue on
> > + * even after one of the requests fail.
> > + */
> > +#define FUSE_COMPOUND_CONTINUE (1 << 1)
>
> Again, I think it makes no sense to have compound-global flags, since
> it might be possible that there are several sub-requests and there are
> different dependencies between for each of them.
>
> Also if there are no examples of a certain flag in this patchset, then
> it's better to just leave it out and add it together with the actual
> user.
actually there is in compound.c
>
> > +/*
> > + * This flags the compound as atomic, which
> > + * means that the operation has to be interpreted
> > + * atomically and be directly supported by the fuse server
> > + * itself.
> > + */
> > +#define FUSE_COMPOUND_ATOMIC (1 << 2)
>
> Why would this be needed? The VFS provides the locking that ensures
> atomicity, even if the implementation is not atomic. At least for
> local filesystems that is always the case.
>
we (by we I mean the fuse server I work on) could use the information that
a certain combination of requests should be atomic.
> > +
> > +/*
> > + * Compound request header
> > + *
> > + * This header is followed by the fuse requests
> > + */
> > +struct fuse_compound_in {
> > + uint32_t flags; /* Compound flags */
>
> Not needed.
>
> > +
> > + /* Total size of all results expected from the fuse server.
> > + * This is needed for preallocating the whole result for all
> > + * commands in the fuse server.
> > + */
> > + uint32_t result_size;
>
> Please drop this. I think libfuse can allocate separate buffers for
> each sub-request's out arg and hand a vector of these to the transport
> layer.
>
> > + uint64_t reserved;
>
> So it turns out the compound header is empty. Not a problem, just
> make it contain 'uint64_t reserved[2]' for future use.
>
OK, will do.
> > +};
> > +
> > +/*
> > + * Compound response header
> > + *
> > + * This header is followed by complete fuse responses
> > + */
> > +struct fuse_compound_out {
> > + uint32_t flags; /* Result flags */
>
> What is this for?
>
This was used for signalling stuff from the fuse server, like e.g.
did we actually create something etc.
On second glance ... in the spirit of your minimalization, probably
not needed any more.
> > + uint32_t padding;
> > + uint64_t reserved;
> > +};
>
> Thanks,
> Miklos
Overall I like your idea to make compounds really minimal.
There is only the part with the interdependencies that I struggle with, since
almost all examples I tried did not have a very simple methodology.
(LOOKUP+MKNOD+OPEN or Luis CREATE_HANDLE+OPEN)
Could you maybe provide some examples of usecases, that I should try to drill the
new logic?
It feels like you have other compounds in mind than I do.
I have used compounds to send groups of semantically linked requests to the fuse server
signalling to it if the kernel expects it to be one atomic operation or a preferred
'group' of requests (like open+getattr, nothing happens if those are not processed atomic
in a distributed file system)
Thanks for taking the time!
Horst
On Fri, 27 Feb 2026 at 11:48, Horst Birthelmer <horst@birthelmer.de> wrote: > > FUSE_SUB_IS_ENTRY - this sub request will return a new entry on > > success (nodeid, filehandle) > > FUSE_SUB_DEP_ENTRY - this sub request depends on the result of a previous lookup > > > > we don't need this if we use my converters from above. Dependencies need to be handled by the kernel and libfuse as well. Makes no sense to have two separate mechanisms for handling dependencies, so the kernel should use the same flags. > Could you maybe provide some examples of usecases, that I should try to drill the > new logic? - LOOKUP + GETATTR[L] - MKOBJ + (SETXATTR[L] (only for posix_acl inheritance)) + GETATTR[L] + (OPEN[L] (optional) - SETATTR + SETXATTR (setting posix_acl that modifies mode or setting mode on file with posix_acl) - INIT + LOOKUP_ROOT + GETATTR[L] - OPEN + IOCTL[O] + RELEASE[O] (fileattr_get/set) Only two dependencies here: lookup or open. Both are simple in terms of just needing to copy a field from a previous request to the current one with fixed positions in all of the above cases. The LOOKUP + MKNOD one *is* more complicated, because it makes execution of the MKNOD dependent on the result of the LOOKUP, so the dependency handler needs to look inside the result and decide how to proceed based on that. Some pros and cons of both approaches, so I'm curious to see how yours looks like. > I have used compounds to send groups of semantically linked requests to the fuse server > signalling to it if the kernel expects it to be one atomic operation or a preferred > 'group' of requests (like open+getattr, nothing happens if those are not processed atomic > in a distributed file system) Which is the case where the kernel expects them to be atomic? Thanks, Miklos
On Fri, Feb 27, 2026 at 12:29:00PM +0100, Miklos Szeredi wrote: > On Fri, 27 Feb 2026 at 11:48, Horst Birthelmer <horst@birthelmer.de> wrote: > > > > FUSE_SUB_IS_ENTRY - this sub request will return a new entry on > > > success (nodeid, filehandle) > > > FUSE_SUB_DEP_ENTRY - this sub request depends on the result of a previous lookup > > > > > > > we don't need this if we use my converters from above. > > Dependencies need to be handled by the kernel and libfuse as well. > Makes no sense to have two separate mechanisms for handling > dependencies, so the kernel should use the same flags. > OK, got it. > > Could you maybe provide some examples of usecases, that I should try to drill the > > new logic? > > - LOOKUP + GETATTR[L] > - MKOBJ + (SETXATTR[L] (only for posix_acl inheritance)) + GETATTR[L] > + (OPEN[L] (optional) > - SETATTR + SETXATTR (setting posix_acl that modifies mode or setting > mode on file with posix_acl) > - INIT + LOOKUP_ROOT + GETATTR[L] > - OPEN + IOCTL[O] + RELEASE[O] (fileattr_get/set) > > Only two dependencies here: lookup or open. Both are simple in terms > of just needing to copy a field from a previous request to the current > one with fixed positions in all of the above cases. > > The LOOKUP + MKNOD one *is* more complicated, because it makes > execution of the MKNOD dependent on the result of the LOOKUP, so the > dependency handler needs to look inside the result and decide how to > proceed based on that. Some pros and cons of both approaches, so I'm > curious to see how yours looks like. > I really am greateful for this list. Helps me a lot, since I was looking at this from the perspective of the fuse server, which truns out to be different. > > I have used compounds to send groups of semantically linked requests to the fuse server > > signalling to it if the kernel expects it to be one atomic operation or a preferred > > 'group' of requests (like open+getattr, nothing happens if those are not processed atomic > > in a distributed file system) > > Which is the case where the kernel expects them to be atomic? > I naively thought that fuse_atomic_open() was actually there to do an atomic open ... ;-) > Thanks, > Miklos > Thanks, Horst
On Fri, 27 Feb 2026 at 12:37, Horst Birthelmer <horst@birthelmer.de> wrote: > I naively thought that fuse_atomic_open() was actually there to do an atomic open ... ;-) Yes, it helps with atomicity relative to operations on other clients in a distributed fs. For a local fs it does not make a difference in terms of correctness, but with compounds it could improve performance compared to separate lookup + mknod + open. Thanks, Miklos
On Thu, Feb 26, 2026 at 8:43 AM Horst Birthelmer <horst@birthelmer.com> wrote:
>
> From: Horst Birthelmer <hbirthelmer@ddn.com>
>
> +ssize_t fuse_compound_send(struct fuse_compound_req *compound)
> +{
> + struct fuse_conn *fc = compound->fm->fc;
> + struct fuse_args args = {
> + .opcode = FUSE_COMPOUND,
> + .in_numargs = 2,
> + .out_numargs = 2,
> + .out_argvar = true,
> + };
> + unsigned int req_count = compound->count;
> + size_t total_expected_out_size = 0;
> + size_t buffer_size = 0;
> + void *resp_payload_buffer;
> + char *buffer_pos;
> + void *buffer = NULL;
> + ssize_t ret;
> + unsigned int i, j;
> +
> + for (i = 0; i < req_count; i++) {
> + struct fuse_args *op_args = compound->op_args[i];
> + size_t needed_size = sizeof(struct fuse_in_header);
> +
> + for (j = 0; j < op_args->in_numargs; j++)
> + needed_size += op_args->in_args[j].size;
> +
> + buffer_size += needed_size;
> +
> + for (j = 0; j < op_args->out_numargs; j++)
> + total_expected_out_size += op_args->out_args[j].size;
> + }
> +
> + buffer = kzalloc(buffer_size, GFP_KERNEL);
> + if (!buffer)
> + return -ENOMEM;
> +
> + buffer_pos = buffer;
> + for (i = 0; i < req_count; i++) {
> + if (compound->op_converters[i]) {
> + ret = compound->op_converters[i](compound, i);
Can you explain why this is needed? The caller has all the information
up front, so why can't it just set this information before calling
fuse_compoudn_send() instead of needing this to be done in the
->op_converters callback?
> + if (ret < 0)
> + goto out_free_buffer;
> + }
> +
> + buffer_pos = fuse_compound_build_one_op(fc,
> + compound->op_args[i],
> + buffer_pos, i);
> + }
> +
> + compound->compound_header.result_size = total_expected_out_size;
> +
> + args.in_args[0].size = sizeof(compound->compound_header);
> + args.in_args[0].value = &compound->compound_header;
> + args.in_args[1].size = buffer_size;
> + args.in_args[1].value = buffer;
> +
> + buffer_size = total_expected_out_size +
> + req_count * sizeof(struct fuse_out_header);
> +
> + resp_payload_buffer = kzalloc(buffer_size, GFP_KERNEL);
> + if (!resp_payload_buffer) {
> + ret = -ENOMEM;
> + goto out_free_buffer;
> + }
> +
> + args.out_args[0].size = sizeof(compound->result_header);
> + args.out_args[0].value = &compound->result_header;
> + args.out_args[1].size = buffer_size;
> + args.out_args[1].value = resp_payload_buffer;
> +
> + ret = fuse_simple_request(compound->fm, &args);
> + if (ret < 0)
> + goto fallback_separate;
> +
> + ret = fuse_handle_compound_results(compound, &args);
> + if (ret == 0)
> + goto out;
> +
> +fallback_separate:
> + /* Kernel tries to fallback to separate requests */
> + if (!(compound->compound_header.flags & FUSE_COMPOUND_ATOMIC))
> + ret = fuse_compound_fallback_separate(compound);
imo it's libfuse's responsibility to handle everything correctly and
if the compound request cannot be handled by libfuse for whatever
reason, the kernel should just fail it instead of retrying each
request separately. I don't see it being likely that if the compound
request fails, then sending each request separately helps. This would
also let us get rid of the FUSE_COMPOUND_CONTINUE flag which imo is a
bit confusing.
Thanks,
Joanne
> +
> +out:
> + kfree(resp_payload_buffer);
> +out_free_buffer:
> + kfree(buffer);
> + return ret;
> +}
© 2016 - 2026 Red Hat, Inc.