[PATCH V3 14/21] famfs_fuse: Plumb the GET_FMAP message/response

John Groves posted 21 patches 1 month ago
[PATCH V3 14/21] famfs_fuse: Plumb the GET_FMAP message/response
Posted by John Groves 1 month ago
Upon completion of an OPEN, if we're in famfs-mode we do a GET_FMAP to
retrieve and cache up the file-to-dax map in the kernel. If this
succeeds, read/write/mmap are resolved direct-to-dax with no upcalls.

Signed-off-by: John Groves <john@groves.net>
---
 MAINTAINERS               |  8 +++++
 fs/fuse/Makefile          |  1 +
 fs/fuse/famfs.c           | 74 +++++++++++++++++++++++++++++++++++++++
 fs/fuse/file.c            | 14 +++++++-
 fs/fuse/fuse_i.h          | 47 ++++++++++++++++++++++++-
 fs/fuse/inode.c           |  8 ++++-
 fs/fuse/iomode.c          |  2 +-
 include/uapi/linux/fuse.h |  7 ++++
 8 files changed, 157 insertions(+), 4 deletions(-)
 create mode 100644 fs/fuse/famfs.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 90429cb06090..526309943026 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10374,6 +10374,14 @@ F:	fs/fuse/
 F:	include/uapi/linux/fuse.h
 F:	tools/testing/selftests/filesystems/fuse/
 
+FUSE [FAMFS Fabric-Attached Memory File System]
+M:	John Groves <jgroves@micron.com>
+M:	John Groves <John@Groves.net>
+L:	linux-cxl@vger.kernel.org
+L:	linux-fsdevel@vger.kernel.org
+S:	Supported
+F:	fs/fuse/famfs.c
+
 FUTEX SUBSYSTEM
 M:	Thomas Gleixner <tglx@linutronix.de>
 M:	Ingo Molnar <mingo@redhat.com>
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 22ad9538dfc4..3f8dcc8cbbd0 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -17,5 +17,6 @@ fuse-$(CONFIG_FUSE_DAX) += dax.o
 fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o backing.o
 fuse-$(CONFIG_SYSCTL) += sysctl.o
 fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o
+fuse-$(CONFIG_FUSE_FAMFS_DAX) += famfs.o
 
 virtiofs-y := virtio_fs.o
diff --git a/fs/fuse/famfs.c b/fs/fuse/famfs.c
new file mode 100644
index 000000000000..0f7e3f00e1e7
--- /dev/null
+++ b/fs/fuse/famfs.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * famfs - dax file system for shared fabric-attached memory
+ *
+ * Copyright 2023-2025 Micron Technology, Inc.
+ *
+ * This file system, originally based on ramfs the dax support from xfs,
+ * is intended to allow multiple host systems to mount a common file system
+ * view of dax files that map to shared memory.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/dax.h>
+#include <linux/iomap.h>
+#include <linux/path.h>
+#include <linux/namei.h>
+#include <linux/string.h>
+
+#include "fuse_i.h"
+
+
+#define FMAP_BUFSIZE PAGE_SIZE
+
+int
+fuse_get_fmap(struct fuse_mount *fm, struct inode *inode)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	size_t fmap_bufsize = FMAP_BUFSIZE;
+	u64 nodeid = get_node_id(inode);
+	ssize_t fmap_size;
+	void *fmap_buf;
+	int rc;
+
+	FUSE_ARGS(args);
+
+	/* Don't retrieve if we already have the famfs metadata */
+	if (fi->famfs_meta)
+		return 0;
+
+	fmap_buf = kcalloc(1, FMAP_BUFSIZE, GFP_KERNEL);
+	if (!fmap_buf)
+		return -EIO;
+
+	args.opcode = FUSE_GET_FMAP;
+	args.nodeid = nodeid;
+
+	/* Variable-sized output buffer
+	 * this causes fuse_simple_request() to return the size of the
+	 * output payload
+	 */
+	args.out_argvar = true;
+	args.out_numargs = 1;
+	args.out_args[0].size = fmap_bufsize;
+	args.out_args[0].value = fmap_buf;
+
+	/* Send GET_FMAP command */
+	rc = fuse_simple_request(fm, &args);
+	if (rc < 0) {
+		pr_err("%s: err=%d from fuse_simple_request()\n",
+		       __func__, rc);
+		return rc;
+	}
+	fmap_size = rc;
+
+	/* We retrieved the "fmap" (the file's map to memory), but
+	 * we haven't used it yet. A call to famfs_file_init_dax() will be added
+	 * here in a subsequent patch, when we add the ability to attach
+	 * fmaps to files.
+	 */
+
+	kfree(fmap_buf);
+	return 0;
+}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 093569033ed1..1f64bf68b5ee 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -277,6 +277,16 @@ static int fuse_open(struct inode *inode, struct file *file)
 	err = fuse_do_open(fm, get_node_id(inode), file, false);
 	if (!err) {
 		ff = file->private_data;
+
+		if ((fm->fc->famfs_iomap) && (S_ISREG(inode->i_mode))) {
+			/* Get the famfs fmap - failure is fatal */
+			err = fuse_get_fmap(fm, inode);
+			if (err) {
+				fuse_sync_release(fi, ff, file->f_flags);
+				goto out_nowrite;
+			}
+		}
+
 		err = fuse_finish_open(inode, file);
 		if (err)
 			fuse_sync_release(fi, ff, file->f_flags);
@@ -284,12 +294,14 @@ static int fuse_open(struct inode *inode, struct file *file)
 			fuse_truncate_update_attr(inode, file);
 	}
 
+out_nowrite:
 	if (is_wb_truncate || dax_truncate)
 		fuse_release_nowrite(inode);
 	if (!err) {
 		if (is_truncate)
 			truncate_pagecache(inode, 0);
-		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+		else if (!(ff->open_flags & FOPEN_KEEP_CACHE) &&
+			 !fuse_file_famfs(fi))
 			invalidate_inode_pages2(inode->i_mapping);
 	}
 	if (dax_truncate)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 84d0ee2a501d..691c7850cf4e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -223,6 +223,14 @@ struct fuse_inode {
 	 * so preserve the blocksize specified by the server.
 	 */
 	u8 cached_i_blkbits;
+
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+	/* Pointer to the file's famfs metadata. Primary content is the
+	 * in-memory version of the fmap - the map from file's offset range
+	 * to DAX memory
+	 */
+	void *famfs_meta;
+#endif
 };
 
 /** FUSE inode state bits */
@@ -1525,11 +1533,14 @@ void fuse_free_conn(struct fuse_conn *fc);
 
 /* dax.c */
 
+static inline int fuse_file_famfs(struct fuse_inode *fi); /* forward */
+
 /* This macro is used by virtio_fs, but now it also needs to filter for
  * "not famfs"
  */
 #define FUSE_IS_VIRTIO_DAX(fuse_inode) (IS_ENABLED(CONFIG_FUSE_DAX)	\
-					&& IS_DAX(&fuse_inode->inode))
+					&& IS_DAX(&fuse_inode->inode)	\
+					&& !fuse_file_famfs(fuse_inode))
 
 ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to);
 ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from);
@@ -1654,4 +1665,38 @@ static inline void famfs_teardown(struct fuse_conn *fc)
 #endif
 }
 
+static inline struct fuse_backing *famfs_meta_set(struct fuse_inode *fi,
+						       void *meta)
+{
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+	return xchg(&fi->famfs_meta, meta);
+#else
+	return NULL;
+#endif
+}
+
+static inline void famfs_meta_free(struct fuse_inode *fi)
+{
+	/* Stub wil be connected in a subsequent commit */
+}
+
+static inline int fuse_file_famfs(struct fuse_inode *fi)
+{
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+	return (READ_ONCE(fi->famfs_meta) != NULL);
+#else
+	return 0;
+#endif
+}
+
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+int fuse_get_fmap(struct fuse_mount *fm, struct inode *inode);
+#else
+static inline int
+fuse_get_fmap(struct fuse_mount *fm, struct inode *inode)
+{
+	return 0;
+}
+#endif
+
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 2e0844aabbae..9e121a1d63b7 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -120,6 +120,9 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
 		fuse_inode_backing_set(fi, NULL);
 
+	if (IS_ENABLED(CONFIG_FUSE_FAMFS_DAX))
+		famfs_meta_set(fi, NULL);
+
 	return &fi->inode;
 
 out_free_forget:
@@ -141,6 +144,9 @@ static void fuse_free_inode(struct inode *inode)
 	if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
 		fuse_backing_put(fuse_inode_backing(fi));
 
+	if (S_ISREG(inode->i_mode) && fuse_file_famfs(fi))
+		famfs_meta_free(fi);
+
 	kmem_cache_free(fuse_inode_cachep, fi);
 }
 
@@ -162,7 +168,7 @@ static void fuse_evict_inode(struct inode *inode)
 	/* Will write inode on close/munmap and in all other dirtiers */
 	WARN_ON(inode_state_read_once(inode) & I_DIRTY_INODE);
 
-	if (FUSE_IS_VIRTIO_DAX(fi))
+	if (FUSE_IS_VIRTIO_DAX(fi) || fuse_file_famfs(fi))
 		dax_break_layout_final(inode);
 
 	truncate_inode_pages_final(&inode->i_data);
diff --git a/fs/fuse/iomode.c b/fs/fuse/iomode.c
index 31ee7f3304c6..948148316ef0 100644
--- a/fs/fuse/iomode.c
+++ b/fs/fuse/iomode.c
@@ -203,7 +203,7 @@ int fuse_file_io_open(struct file *file, struct inode *inode)
 	 * io modes are not relevant with DAX and with server that does not
 	 * implement open.
 	 */
-	if (FUSE_IS_VIRTIO_DAX(fi) || !ff->args)
+	if (FUSE_IS_VIRTIO_DAX(fi) || fuse_file_famfs(fi) || !ff->args)
 		return 0;
 
 	/*
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 5e2c93433823..bfb92a4aa8a9 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -669,6 +669,9 @@ enum fuse_opcode {
 	FUSE_STATX		= 52,
 	FUSE_COPY_FILE_RANGE_64	= 53,
 
+	/* Famfs / devdax opcodes */
+	FUSE_GET_FMAP           = 54,
+
 	/* CUSE specific operations */
 	CUSE_INIT		= 4096,
 
@@ -1313,4 +1316,8 @@ struct fuse_uring_cmd_req {
 	uint8_t padding[6];
 };
 
+/* Famfs fmap message components */
+
+#define FAMFS_FMAP_MAX 32768 /* Largest supported fmap message */
+
 #endif /* _LINUX_FUSE_H */
-- 
2.49.0
Re: [PATCH V3 14/21] famfs_fuse: Plumb the GET_FMAP message/response
Posted by Jonathan Cameron 1 month ago
On Wed,  7 Jan 2026 09:33:23 -0600
John Groves <John@Groves.net> wrote:

> Upon completion of an OPEN, if we're in famfs-mode we do a GET_FMAP to
> retrieve and cache up the file-to-dax map in the kernel. If this
> succeeds, read/write/mmap are resolved direct-to-dax with no upcalls.
> 
> Signed-off-by: John Groves <john@groves.net>
A few things inline.

J

> diff --git a/fs/fuse/famfs.c b/fs/fuse/famfs.c
> new file mode 100644
> index 000000000000..0f7e3f00e1e7
> --- /dev/null
> +++ b/fs/fuse/famfs.c
> @@ -0,0 +1,74 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * famfs - dax file system for shared fabric-attached memory
> + *
> + * Copyright 2023-2025 Micron Technology, Inc.
> + *
> + * This file system, originally based on ramfs the dax support from xfs,
> + * is intended to allow multiple host systems to mount a common file system
> + * view of dax files that map to shared memory.
> + */
> +
> +#include <linux/fs.h>
> +#include <linux/mm.h>
> +#include <linux/dax.h>
> +#include <linux/iomap.h>
> +#include <linux/path.h>
> +#include <linux/namei.h>
> +#include <linux/string.h>
> +
> +#include "fuse_i.h"
> +
> +
> +#define FMAP_BUFSIZE PAGE_SIZE
> +
> +int
> +fuse_get_fmap(struct fuse_mount *fm, struct inode *inode)
> +{
> +	struct fuse_inode *fi = get_fuse_inode(inode);
> +	size_t fmap_bufsize = FMAP_BUFSIZE;
> +	u64 nodeid = get_node_id(inode);
> +	ssize_t fmap_size;
> +	void *fmap_buf;
> +	int rc;
> +
> +	FUSE_ARGS(args);
> +
> +	/* Don't retrieve if we already have the famfs metadata */
> +	if (fi->famfs_meta)
> +		return 0;
> +
> +	fmap_buf = kcalloc(1, FMAP_BUFSIZE, GFP_KERNEL);

If there is only ever 1, does kcalloc() make sense over kzalloc()?

> +	if (!fmap_buf)
> +		return -EIO;
> +
> +	args.opcode = FUSE_GET_FMAP;
> +	args.nodeid = nodeid;
> +
> +	/* Variable-sized output buffer
> +	 * this causes fuse_simple_request() to return the size of the
> +	 * output payload
> +	 */
> +	args.out_argvar = true;
> +	args.out_numargs = 1;
> +	args.out_args[0].size = fmap_bufsize;
> +	args.out_args[0].value = fmap_buf;
> +
> +	/* Send GET_FMAP command */
> +	rc = fuse_simple_request(fm, &args);
> +	if (rc < 0) {
> +		pr_err("%s: err=%d from fuse_simple_request()\n",
> +		       __func__, rc);

Leaks the fmap_buf?  Maybe use a __free() so no need to keep track of htat.


> +		return rc;
> +	}
> +	fmap_size = rc;
> +
> +	/* We retrieved the "fmap" (the file's map to memory), but
> +	 * we haven't used it yet. A call to famfs_file_init_dax() will be added
> +	 * here in a subsequent patch, when we add the ability to attach
> +	 * fmaps to files.
> +	 */
> +
> +	kfree(fmap_buf);
> +	return 0;
> +}

> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 84d0ee2a501d..691c7850cf4e 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -223,6 +223,14 @@ struct fuse_inode {

>  
> +static inline struct fuse_backing *famfs_meta_set(struct fuse_inode *fi,
> +						       void *meta)
> +{
> +#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
> +	return xchg(&fi->famfs_meta, meta);
> +#else
> +	return NULL;
> +#endif
> +}
> +
> +static inline void famfs_meta_free(struct fuse_inode *fi)
> +{
> +	/* Stub wil be connected in a subsequent commit */
> +}
> +
> +static inline int fuse_file_famfs(struct fuse_inode *fi)
> +{
> +#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
> +	return (READ_ONCE(fi->famfs_meta) != NULL);
> +#else
> +	return 0;
> +#endif
> +}
> +
> +#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
> +int fuse_get_fmap(struct fuse_mount *fm, struct inode *inode);
> +#else
> +static inline int
> +fuse_get_fmap(struct fuse_mount *fm, struct inode *inode)
> +{
> +	return 0;
> +}
> +#endif
I'd do a single block under one if IS_ENABLED() and then use an else
for the stubs.   Should end up more readable.

Jonathan
Re: [PATCH V3 14/21] famfs_fuse: Plumb the GET_FMAP message/response
Posted by John Groves 1 month ago
On 26/01/08 12:49PM, Jonathan Cameron wrote:
> On Wed,  7 Jan 2026 09:33:23 -0600
> John Groves <John@Groves.net> wrote:
> 
> > Upon completion of an OPEN, if we're in famfs-mode we do a GET_FMAP to
> > retrieve and cache up the file-to-dax map in the kernel. If this
> > succeeds, read/write/mmap are resolved direct-to-dax with no upcalls.
> > 
> > Signed-off-by: John Groves <john@groves.net>
> A few things inline.
> 
> J
> 
> > diff --git a/fs/fuse/famfs.c b/fs/fuse/famfs.c
> > new file mode 100644
> > index 000000000000..0f7e3f00e1e7
> > --- /dev/null
> > +++ b/fs/fuse/famfs.c
> > @@ -0,0 +1,74 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * famfs - dax file system for shared fabric-attached memory
> > + *
> > + * Copyright 2023-2025 Micron Technology, Inc.
> > + *
> > + * This file system, originally based on ramfs the dax support from xfs,
> > + * is intended to allow multiple host systems to mount a common file system
> > + * view of dax files that map to shared memory.
> > + */
> > +
> > +#include <linux/fs.h>
> > +#include <linux/mm.h>
> > +#include <linux/dax.h>
> > +#include <linux/iomap.h>
> > +#include <linux/path.h>
> > +#include <linux/namei.h>
> > +#include <linux/string.h>
> > +
> > +#include "fuse_i.h"
> > +
> > +
> > +#define FMAP_BUFSIZE PAGE_SIZE
> > +
> > +int
> > +fuse_get_fmap(struct fuse_mount *fm, struct inode *inode)
> > +{
> > +	struct fuse_inode *fi = get_fuse_inode(inode);
> > +	size_t fmap_bufsize = FMAP_BUFSIZE;
> > +	u64 nodeid = get_node_id(inode);
> > +	ssize_t fmap_size;
> > +	void *fmap_buf;
> > +	int rc;
> > +
> > +	FUSE_ARGS(args);
> > +
> > +	/* Don't retrieve if we already have the famfs metadata */
> > +	if (fi->famfs_meta)
> > +		return 0;
> > +
> > +	fmap_buf = kcalloc(1, FMAP_BUFSIZE, GFP_KERNEL);
> 
> If there is only ever 1, does kcalloc() make sense over kzalloc()?

Muscle memory? Good call, done.

> 
> > +	if (!fmap_buf)
> > +		return -EIO;
> > +
> > +	args.opcode = FUSE_GET_FMAP;
> > +	args.nodeid = nodeid;
> > +
> > +	/* Variable-sized output buffer
> > +	 * this causes fuse_simple_request() to return the size of the
> > +	 * output payload
> > +	 */
> > +	args.out_argvar = true;
> > +	args.out_numargs = 1;
> > +	args.out_args[0].size = fmap_bufsize;
> > +	args.out_args[0].value = fmap_buf;
> > +
> > +	/* Send GET_FMAP command */
> > +	rc = fuse_simple_request(fm, &args);
> > +	if (rc < 0) {
> > +		pr_err("%s: err=%d from fuse_simple_request()\n",
> > +		       __func__, rc);
> 
> Leaks the fmap_buf?  Maybe use a __free() so no need to keep track of htat.

Another good one - done.

> 
> 
> > +		return rc;
> > +	}
> > +	fmap_size = rc;
> > +
> > +	/* We retrieved the "fmap" (the file's map to memory), but
> > +	 * we haven't used it yet. A call to famfs_file_init_dax() will be added
> > +	 * here in a subsequent patch, when we add the ability to attach
> > +	 * fmaps to files.
> > +	 */
> > +
> > +	kfree(fmap_buf);
> > +	return 0;
> > +}
> 
> > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> > index 84d0ee2a501d..691c7850cf4e 100644
> > --- a/fs/fuse/fuse_i.h
> > +++ b/fs/fuse/fuse_i.h
> > @@ -223,6 +223,14 @@ struct fuse_inode {
> 
> >  
> > +static inline struct fuse_backing *famfs_meta_set(struct fuse_inode *fi,
> > +						       void *meta)
> > +{
> > +#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
> > +	return xchg(&fi->famfs_meta, meta);
> > +#else
> > +	return NULL;
> > +#endif
> > +}
> > +
> > +static inline void famfs_meta_free(struct fuse_inode *fi)
> > +{
> > +	/* Stub wil be connected in a subsequent commit */
> > +}
> > +
> > +static inline int fuse_file_famfs(struct fuse_inode *fi)
> > +{
> > +#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
> > +	return (READ_ONCE(fi->famfs_meta) != NULL);
> > +#else
> > +	return 0;
> > +#endif
> > +}
> > +
> > +#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
> > +int fuse_get_fmap(struct fuse_mount *fm, struct inode *inode);
> > +#else
> > +static inline int
> > +fuse_get_fmap(struct fuse_mount *fm, struct inode *inode)
> > +{
> > +	return 0;
> > +}
> > +#endif
> I'd do a single block under one if IS_ENABLED() and then use an else
> for the stubs.   Should end up more readable.

OK, this sounds good, but it's rebase hell (oh, the humanity! :D). Multiple 
additional commits flesh out this stuff, and for now I'm giving up on that
rebase. I tried the flip, but I don't have all night (tonight), so I'm 
leaving it alone. 

I'll happily clean this up after the series is complete.

Thanks!
John