[PATCH V8 3/8] dax: add fsdev.c driver for fs-dax on character dax

John Groves posted 8 patches 2 weeks, 4 days ago
[PATCH V8 3/8] dax: add fsdev.c driver for fs-dax on character dax
Posted by John Groves 2 weeks, 4 days ago
The new fsdev driver provides pages/folios initialized compatibly with
fsdax - normal rather than devdax-style refcounting, and starting out
with order-0 folios.

When fsdev binds to a daxdev, it is usually (always?) switching from the
devdax mode (device.c), which pre-initializes compound folios according
to its alignment. Fsdev uses fsdev_clear_folio_state() to switch the
folios into a fsdax-compatible state.

A side effect of this is that raw mmap doesn't (can't?) work on an fsdev
dax instance. Accordingly, The fsdev driver does not provide raw mmap -
devices must be put in 'devdax' mode (drivers/dax/device.c) to get raw
mmap capability.

In this commit is just the framework, which remaps pages/folios compatibly
with fsdax.

Enabling dax changes:

- bus.h: add DAXDRV_FSDEV_TYPE driver type
- bus.c: allow DAXDRV_FSDEV_TYPE drivers to bind to daxdevs
- dax.h: prototype inode_dax(), which fsdev needs

Suggested-by: Dan Williams <dan.j.williams@intel.com>
Suggested-by: Gregory Price <gourry@gourry.net>
Signed-off-by: John Groves <john@groves.net>
---
 MAINTAINERS          |   8 ++
 drivers/dax/Makefile |   6 +
 drivers/dax/bus.c    |   4 +
 drivers/dax/bus.h    |   1 +
 drivers/dax/fsdev.c  | 253 +++++++++++++++++++++++++++++++++++++++++++
 fs/dax.c             |   1 +
 include/linux/dax.h  |   3 +
 7 files changed, 276 insertions(+)
 create mode 100644 drivers/dax/fsdev.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 96ea84948d76..e83cfcf7e932 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7298,6 +7298,14 @@ L:	linux-cxl@vger.kernel.org
 S:	Supported
 F:	drivers/dax/
 
+DEVICE DIRECT ACCESS (DAX) [fsdev_dax]
+M:	John Groves <jgroves@micron.com>
+M:	John Groves <John@Groves.net>
+L:	nvdimm@lists.linux.dev
+L:	linux-cxl@vger.kernel.org
+S:	Supported
+F:	drivers/dax/fsdev.c
+
 DEVICE FREQUENCY (DEVFREQ)
 M:	MyungJoo Ham <myungjoo.ham@samsung.com>
 M:	Kyungmin Park <kyungmin.park@samsung.com>
diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile
index 5ed5c39857c8..3bae252fd1bf 100644
--- a/drivers/dax/Makefile
+++ b/drivers/dax/Makefile
@@ -5,10 +5,16 @@ obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
 obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
 obj-$(CONFIG_DEV_DAX_CXL) += dax_cxl.o
 
+# fsdev_dax: fs-dax compatible devdax driver (needs DEV_DAX and FS_DAX)
+ifeq ($(CONFIG_FS_DAX),y)
+obj-$(CONFIG_DEV_DAX) += fsdev_dax.o
+endif
+
 dax-y := super.o
 dax-y += bus.o
 device_dax-y := device.o
 dax_pmem-y := pmem.o
 dax_cxl-y := cxl.o
+fsdev_dax-y := fsdev.o
 
 obj-y += hmem/
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index e4bd5c9f006c..562e2b06f61a 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -81,6 +81,10 @@ static int dax_match_type(const struct dax_device_driver *dax_drv, struct device
 	    !IS_ENABLED(CONFIG_DEV_DAX_KMEM))
 		return 1;
 
+	/* fsdev driver can also bind to device-type dax devices */
+	if (dax_drv->type == DAXDRV_FSDEV_TYPE && type == DAXDRV_DEVICE_TYPE)
+		return 1;
+
 	return 0;
 }
 
diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
index cbbf64443098..880bdf7e72d7 100644
--- a/drivers/dax/bus.h
+++ b/drivers/dax/bus.h
@@ -31,6 +31,7 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data);
 enum dax_driver_type {
 	DAXDRV_KMEM_TYPE,
 	DAXDRV_DEVICE_TYPE,
+	DAXDRV_FSDEV_TYPE,
 };
 
 struct dax_device_driver {
diff --git a/drivers/dax/fsdev.c b/drivers/dax/fsdev.c
new file mode 100644
index 000000000000..e5b4396ce401
--- /dev/null
+++ b/drivers/dax/fsdev.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2026 Micron Technology, Inc. */
+#include <linux/memremap.h>
+#include <linux/pagemap.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/slab.h>
+#include <linux/dax.h>
+#include <linux/uio.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include "dax-private.h"
+#include "bus.h"
+
+/*
+ * FS-DAX compatible devdax driver
+ *
+ * Unlike drivers/dax/device.c which pre-initializes compound folios based
+ * on device alignment (via vmemmap_shift), this driver leaves folios
+ * uninitialized similar to pmem. This allows fs-dax filesystems like famfs
+ * to work without needing special handling for pre-initialized folios.
+ *
+ * Key differences from device.c:
+ * - pgmap type is MEMORY_DEVICE_FS_DAX (not MEMORY_DEVICE_GENERIC)
+ * - vmemmap_shift is NOT set (folios remain order-0)
+ * - fs-dax can dynamically create compound folios as needed
+ * - No mmap support - all access is through fs-dax/iomap
+ */
+
+
+static void fsdev_cdev_del(void *cdev)
+{
+	cdev_del(cdev);
+}
+
+static void fsdev_kill(void *dev_dax)
+{
+	kill_dev_dax(dev_dax);
+}
+
+/*
+ * Page map operations for FS-DAX mode
+ * Similar to fsdax_pagemap_ops in drivers/nvdimm/pmem.c
+ *
+ * Note: folio_free callback is not needed for MEMORY_DEVICE_FS_DAX.
+ * The core mm code in free_zone_device_folio() handles the wake_up_var()
+ * directly for this memory type.
+ */
+static int fsdev_pagemap_memory_failure(struct dev_pagemap *pgmap,
+		unsigned long pfn, unsigned long nr_pages, int mf_flags)
+{
+	struct dev_dax *dev_dax = pgmap->owner;
+	u64 offset = PFN_PHYS(pfn) - dev_dax->ranges[0].range.start;
+	u64 len = nr_pages << PAGE_SHIFT;
+
+	return dax_holder_notify_failure(dev_dax->dax_dev, offset,
+					 len, mf_flags);
+}
+
+static const struct dev_pagemap_ops fsdev_pagemap_ops = {
+	.memory_failure		= fsdev_pagemap_memory_failure,
+};
+
+/*
+ * Clear any stale folio state from pages in the given range.
+ * This is necessary because device_dax pre-initializes compound folios
+ * based on vmemmap_shift, and that state may persist after driver unbind.
+ * Since fsdev_dax uses MEMORY_DEVICE_FS_DAX without vmemmap_shift, fs-dax
+ * expects to find clean order-0 folios that it can build into compound
+ * folios on demand.
+ *
+ * At probe time, no filesystem should be mounted yet, so all mappings
+ * are stale and must be cleared along with compound state.
+ */
+static void fsdev_clear_folio_state(struct dev_dax *dev_dax)
+{
+	for (int i = 0; i < dev_dax->nr_range; i++) {
+		struct range *range = &dev_dax->ranges[i].range;
+		unsigned long pfn = PHYS_PFN(range->start);
+		unsigned long end_pfn = PHYS_PFN(range->end) + 1;
+
+		while (pfn < end_pfn) {
+			struct folio *folio = pfn_folio(pfn);
+			int order = dax_folio_reset_order(folio);
+
+			pfn += 1UL << order;
+		}
+	}
+}
+
+static void fsdev_clear_folio_state_action(void *data)
+{
+	fsdev_clear_folio_state(data);
+}
+
+static int fsdev_open(struct inode *inode, struct file *filp)
+{
+	struct dax_device *dax_dev = inode_dax(inode);
+	struct dev_dax *dev_dax = dax_get_private(dax_dev);
+
+	filp->private_data = dev_dax;
+
+	return 0;
+}
+
+static int fsdev_release(struct inode *inode, struct file *filp)
+{
+	return 0;
+}
+
+static const struct file_operations fsdev_fops = {
+	.llseek = noop_llseek,
+	.owner = THIS_MODULE,
+	.open = fsdev_open,
+	.release = fsdev_release,
+};
+
+static int fsdev_dax_probe(struct dev_dax *dev_dax)
+{
+	struct dax_device *dax_dev = dev_dax->dax_dev;
+	struct device *dev = &dev_dax->dev;
+	struct dev_pagemap *pgmap;
+	u64 data_offset = 0;
+	struct inode *inode;
+	struct cdev *cdev;
+	void *addr;
+	int rc, i;
+
+	if (static_dev_dax(dev_dax))  {
+		if (dev_dax->nr_range > 1) {
+			dev_warn(dev, "static pgmap / multi-range device conflict\n");
+			return -EINVAL;
+		}
+
+		pgmap = dev_dax->pgmap;
+	} else {
+		size_t pgmap_size;
+
+		if (dev_dax->pgmap) {
+			dev_warn(dev, "dynamic-dax with pre-populated page map\n");
+			return -EINVAL;
+		}
+
+		pgmap_size = struct_size(pgmap, ranges, dev_dax->nr_range - 1);
+		pgmap = devm_kzalloc(dev, pgmap_size,  GFP_KERNEL);
+		if (!pgmap)
+			return -ENOMEM;
+
+		pgmap->nr_range = dev_dax->nr_range;
+		dev_dax->pgmap = pgmap;
+
+		for (i = 0; i < dev_dax->nr_range; i++) {
+			struct range *range = &dev_dax->ranges[i].range;
+
+			pgmap->ranges[i] = *range;
+		}
+	}
+
+	for (i = 0; i < dev_dax->nr_range; i++) {
+		struct range *range = &dev_dax->ranges[i].range;
+
+		if (!devm_request_mem_region(dev, range->start,
+					range_len(range), dev_name(dev))) {
+			dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve range\n",
+				 i, range->start, range->end);
+			return -EBUSY;
+		}
+	}
+
+	/*
+	 * FS-DAX compatible mode: Use MEMORY_DEVICE_FS_DAX type and
+	 * do NOT set vmemmap_shift. This leaves folios at order-0,
+	 * allowing fs-dax to dynamically create compound folios as needed
+	 * (similar to pmem behavior).
+	 */
+	pgmap->type = MEMORY_DEVICE_FS_DAX;
+	pgmap->ops = &fsdev_pagemap_ops;
+	pgmap->owner = dev_dax;
+
+	/*
+	 * CRITICAL DIFFERENCE from device.c:
+	 * We do NOT set vmemmap_shift here, even if align > PAGE_SIZE.
+	 * This ensures folios remain order-0 and are compatible with
+	 * fs-dax's folio management.
+	 */
+
+	addr = devm_memremap_pages(dev, pgmap);
+	if (IS_ERR(addr))
+		return PTR_ERR(addr);
+
+	/*
+	 * Clear any stale compound folio state left over from a previous
+	 * driver (e.g., device_dax with vmemmap_shift). Also register this
+	 * as a devm action so folio state is cleared on unbind, ensuring
+	 * clean pages for subsequent drivers (e.g., kmem for system-ram).
+	 */
+	fsdev_clear_folio_state(dev_dax);
+	rc = devm_add_action_or_reset(dev, fsdev_clear_folio_state_action,
+				      dev_dax);
+	if (rc)
+		return rc;
+
+	/* Detect whether the data is at a non-zero offset into the memory */
+	if (pgmap->range.start != dev_dax->ranges[0].range.start) {
+		u64 phys = dev_dax->ranges[0].range.start;
+		u64 pgmap_phys = dev_dax->pgmap[0].range.start;
+
+		if (!WARN_ON(pgmap_phys > phys))
+			data_offset = phys - pgmap_phys;
+
+		pr_debug("%s: offset detected phys=%llx pgmap_phys=%llx offset=%llx\n",
+		       __func__, phys, pgmap_phys, data_offset);
+	}
+
+	inode = dax_inode(dax_dev);
+	cdev = inode->i_cdev;
+	cdev_init(cdev, &fsdev_fops);
+	cdev->owner = dev->driver->owner;
+	cdev_set_parent(cdev, &dev->kobj);
+	rc = cdev_add(cdev, dev->devt, 1);
+	if (rc)
+		return rc;
+
+	rc = devm_add_action_or_reset(dev, fsdev_cdev_del, cdev);
+	if (rc)
+		return rc;
+
+	run_dax(dax_dev);
+	return devm_add_action_or_reset(dev, fsdev_kill, dev_dax);
+}
+
+static struct dax_device_driver fsdev_dax_driver = {
+	.probe = fsdev_dax_probe,
+	.type = DAXDRV_FSDEV_TYPE,
+};
+
+static int __init dax_init(void)
+{
+	return dax_driver_register(&fsdev_dax_driver);
+}
+
+static void __exit dax_exit(void)
+{
+	dax_driver_unregister(&fsdev_dax_driver);
+}
+
+MODULE_AUTHOR("John Groves");
+MODULE_DESCRIPTION("FS-DAX Device: fs-dax compatible devdax driver");
+MODULE_LICENSE("GPL");
+module_init(dax_init);
+module_exit(dax_exit);
+MODULE_ALIAS_DAX_DEVICE(0);
diff --git a/fs/dax.c b/fs/dax.c
index 7d7bbfb32c41..85a4b428e72b 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -416,6 +416,7 @@ int dax_folio_reset_order(struct folio *folio)
 
 	return order;
 }
+EXPORT_SYMBOL_GPL(dax_folio_reset_order);
 
 static inline unsigned long dax_folio_put(struct folio *folio)
 {
diff --git a/include/linux/dax.h b/include/linux/dax.h
index bf103f317cac..996493f5c538 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -51,6 +51,7 @@ struct dax_holder_operations {
 
 #if IS_ENABLED(CONFIG_DAX)
 struct dax_device *alloc_dax(void *private, const struct dax_operations *ops);
+
 void *dax_holder(struct dax_device *dax_dev);
 void put_dax(struct dax_device *dax_dev);
 void kill_dax(struct dax_device *dax_dev);
@@ -151,8 +152,10 @@ static inline void fs_put_dax(struct dax_device *dax_dev, void *holder)
 #endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
 
 #if IS_ENABLED(CONFIG_FS_DAX)
+struct dax_device *inode_dax(struct inode *inode);
 int dax_writeback_mapping_range(struct address_space *mapping,
 		struct dax_device *dax_dev, struct writeback_control *wbc);
+int dax_folio_reset_order(struct folio *folio);
 
 struct page *dax_layout_busy_page(struct address_space *mapping);
 struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
-- 
2.53.0
Re: [PATCH V8 3/8] dax: add fsdev.c driver for fs-dax on character dax
Posted by Jonathan Cameron 2 weeks, 4 days ago
On Wed, 18 Mar 2026 20:28:37 -0500
John Groves <john@groves.net> wrote:

> The new fsdev driver provides pages/folios initialized compatibly with
> fsdax - normal rather than devdax-style refcounting, and starting out
> with order-0 folios.
> 
> When fsdev binds to a daxdev, it is usually (always?) switching from the
> devdax mode (device.c), which pre-initializes compound folios according
> to its alignment. Fsdev uses fsdev_clear_folio_state() to switch the
> folios into a fsdax-compatible state.
> 
> A side effect of this is that raw mmap doesn't (can't?) work on an fsdev
> dax instance. Accordingly, The fsdev driver does not provide raw mmap -
> devices must be put in 'devdax' mode (drivers/dax/device.c) to get raw
> mmap capability.
> 
> In this commit is just the framework, which remaps pages/folios compatibly
> with fsdax.
> 
> Enabling dax changes:
> 
> - bus.h: add DAXDRV_FSDEV_TYPE driver type
> - bus.c: allow DAXDRV_FSDEV_TYPE drivers to bind to daxdevs
> - dax.h: prototype inode_dax(), which fsdev needs
> 
> Suggested-by: Dan Williams <dan.j.williams@intel.com>
> Suggested-by: Gregory Price <gourry@gourry.net>
> Signed-off-by: John Groves <john@groves.net>

A few comments inline.  I think some of the code here could be moved
to a helper library used by both this and device.c

> ---
>  MAINTAINERS          |   8 ++
>  drivers/dax/Makefile |   6 +
>  drivers/dax/bus.c    |   4 +
>  drivers/dax/bus.h    |   1 +
>  drivers/dax/fsdev.c  | 253 +++++++++++++++++++++++++++++++++++++++++++
>  fs/dax.c             |   1 +
>  include/linux/dax.h  |   3 +
>  7 files changed, 276 insertions(+)
>  create mode 100644 drivers/dax/fsdev.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 96ea84948d76..e83cfcf7e932 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -7298,6 +7298,14 @@ L:	linux-cxl@vger.kernel.org
>  S:	Supported
>  F:	drivers/dax/
>  
> +DEVICE DIRECT ACCESS (DAX) [fsdev_dax]
> +M:	John Groves <jgroves@micron.com>
> +M:	John Groves <John@Groves.net>
> +L:	nvdimm@lists.linux.dev
> +L:	linux-cxl@vger.kernel.org
> +S:	Supported
> +F:	drivers/dax/fsdev.c
> +
>  DEVICE FREQUENCY (DEVFREQ)
>  M:	MyungJoo Ham <myungjoo.ham@samsung.com>
>  M:	Kyungmin Park <kyungmin.park@samsung.com>
> diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile
> index 5ed5c39857c8..3bae252fd1bf 100644
> --- a/drivers/dax/Makefile
> +++ b/drivers/dax/Makefile
> @@ -5,10 +5,16 @@ obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
>  obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
>  obj-$(CONFIG_DEV_DAX_CXL) += dax_cxl.o
>  
> +# fsdev_dax: fs-dax compatible devdax driver (needs DEV_DAX and FS_DAX)
> +ifeq ($(CONFIG_FS_DAX),y)
> +obj-$(CONFIG_DEV_DAX) += fsdev_dax.o
> +endif

Why not throw in a new CONFIG_FSDAX_DEV and handle the dependencies
in Kconfig?  

> +
>  dax-y := super.o
>  dax-y += bus.o
>  device_dax-y := device.o
>  dax_pmem-y := pmem.o
>  dax_cxl-y := cxl.o
> +fsdev_dax-y := fsdev.o
>  
>  obj-y += hmem/

> diff --git a/drivers/dax/fsdev.c b/drivers/dax/fsdev.c
> new file mode 100644
> index 000000000000..e5b4396ce401
> --- /dev/null
> +++ b/drivers/dax/fsdev.c

> +static int fsdev_dax_probe(struct dev_dax *dev_dax)
> +{
> +	struct dax_device *dax_dev = dev_dax->dax_dev;
> +	struct device *dev = &dev_dax->dev;
> +	struct dev_pagemap *pgmap;
> +	u64 data_offset = 0;

See below. I think you can useful reduce scope of this one.

> +	struct inode *inode;
> +	struct cdev *cdev;
> +	void *addr;
> +	int rc, i;
> +

There is a lot of duplication in here with dax/device.c
Is any of it suitable for shared helpers?

> +	if (static_dev_dax(dev_dax))  {
> +		if (dev_dax->nr_range > 1) {
> +			dev_warn(dev, "static pgmap / multi-range device conflict\n");
> +			return -EINVAL;
> +		}
> +
> +		pgmap = dev_dax->pgmap;
> +	} else {
> +		size_t pgmap_size;
> +
> +		if (dev_dax->pgmap) {
> +			dev_warn(dev, "dynamic-dax with pre-populated page map\n");
> +			return -EINVAL;
> +		}
> +
> +		pgmap_size = struct_size(pgmap, ranges, dev_dax->nr_range - 1);
> +		pgmap = devm_kzalloc(dev, pgmap_size,  GFP_KERNEL);

Bonus space before GFP_KERNEL.


> +		if (!pgmap)
> +			return -ENOMEM;
> +
> +		pgmap->nr_range = dev_dax->nr_range;
> +		dev_dax->pgmap = pgmap;
> +
> +		for (i = 0; i < dev_dax->nr_range; i++) {
> +			struct range *range = &dev_dax->ranges[i].range;
> +
> +			pgmap->ranges[i] = *range;
> +		}
> +	}
> +
> +	for (i = 0; i < dev_dax->nr_range; i++) {
> +		struct range *range = &dev_dax->ranges[i].range;
> +
> +		if (!devm_request_mem_region(dev, range->start,
> +					range_len(range), dev_name(dev))) {
> +			dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve range\n",
> +				 i, range->start, range->end);
> +			return -EBUSY;
> +		}
> +	}

Everything above here is shared.  Some sort of _init() or similar library function
seems in order.

> +
> +	/*
> +	 * FS-DAX compatible mode: Use MEMORY_DEVICE_FS_DAX type and
> +	 * do NOT set vmemmap_shift. This leaves folios at order-0,
> +	 * allowing fs-dax to dynamically create compound folios as needed
> +	 * (similar to pmem behavior).
> +	 */
> +	pgmap->type = MEMORY_DEVICE_FS_DAX;
> +	pgmap->ops = &fsdev_pagemap_ops;
> +	pgmap->owner = dev_dax;
> +
> +	/*
> +	 * CRITICAL DIFFERENCE from device.c:
> +	 * We do NOT set vmemmap_shift here, even if align > PAGE_SIZE.
> +	 * This ensures folios remain order-0 and are compatible with
> +	 * fs-dax's folio management.
> +	 */
> +
> +	addr = devm_memremap_pages(dev, pgmap);
> +	if (IS_ERR(addr))
> +		return PTR_ERR(addr);
> +
> +	/*
> +	 * Clear any stale compound folio state left over from a previous
> +	 * driver (e.g., device_dax with vmemmap_shift). Also register this
> +	 * as a devm action so folio state is cleared on unbind, ensuring
> +	 * clean pages for subsequent drivers (e.g., kmem for system-ram).
> +	 */
> +	fsdev_clear_folio_state(dev_dax);
> +	rc = devm_add_action_or_reset(dev, fsdev_clear_folio_state_action,
> +				      dev_dax);
> +	if (rc)
> +		return rc;
> +
> +	/* Detect whether the data is at a non-zero offset into the memory */
> +	if (pgmap->range.start != dev_dax->ranges[0].range.start) {
> +		u64 phys = dev_dax->ranges[0].range.start;
> +		u64 pgmap_phys = dev_dax->pgmap[0].range.start;
> +
> +		if (!WARN_ON(pgmap_phys > phys))
> +			data_offset = phys - pgmap_phys;
> +
> +		pr_debug("%s: offset detected phys=%llx pgmap_phys=%llx offset=%llx\n",
> +		       __func__, phys, pgmap_phys, data_offset);

Might change later, but at least at this point you could pull declaration of data_offset
into this scope.

> +	}
> +
> +	inode = dax_inode(dax_dev);
> +	cdev = inode->i_cdev;
> +	cdev_init(cdev, &fsdev_fops);
> +	cdev->owner = dev->driver->owner;
> +	cdev_set_parent(cdev, &dev->kobj);
> +	rc = cdev_add(cdev, dev->devt, 1);
> +	if (rc)
> +		return rc;
> +
> +	rc = devm_add_action_or_reset(dev, fsdev_cdev_del, cdev);
> +	if (rc)
> +		return rc;
> +
> +	run_dax(dax_dev);
> +	return devm_add_action_or_reset(dev, fsdev_kill, dev_dax);
> +}

> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index bf103f317cac..996493f5c538 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -51,6 +51,7 @@ struct dax_holder_operations {
>  
>  #if IS_ENABLED(CONFIG_DAX)
>  struct dax_device *alloc_dax(void *private, const struct dax_operations *ops);
> +

Unrelated change.  Tidy this up for v9.


>  void *dax_holder(struct dax_device *dax_dev);
>  void put_dax(struct dax_device *dax_dev);
>  void kill_dax(struct dax_device *dax_dev);
> @@ -151,8 +152,10 @@ static inline void fs_put_dax(struct dax_device *dax_dev, void *holder)
>  #endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
>  
>  #if IS_ENABLED(CONFIG_FS_DAX)
> +struct dax_device *inode_dax(struct inode *inode);

Already in dax_private.h so why does it want to be here?


>  int dax_writeback_mapping_range(struct address_space *mapping,
>  		struct dax_device *dax_dev, struct writeback_control *wbc);
> +int dax_folio_reset_order(struct folio *folio);
>  
>  struct page *dax_layout_busy_page(struct address_space *mapping);
>  struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
Re: [PATCH V8 3/8] dax: add fsdev.c driver for fs-dax on character dax
Posted by John Groves 2 weeks, 2 days ago
On 26/03/19 12:20PM, Jonathan Cameron wrote:
> On Wed, 18 Mar 2026 20:28:37 -0500
> John Groves <john@groves.net> wrote:
> 
> > The new fsdev driver provides pages/folios initialized compatibly with
> > fsdax - normal rather than devdax-style refcounting, and starting out
> > with order-0 folios.
> > 
> > When fsdev binds to a daxdev, it is usually (always?) switching from the
> > devdax mode (device.c), which pre-initializes compound folios according
> > to its alignment. Fsdev uses fsdev_clear_folio_state() to switch the
> > folios into a fsdax-compatible state.
> > 
> > A side effect of this is that raw mmap doesn't (can't?) work on an fsdev
> > dax instance. Accordingly, The fsdev driver does not provide raw mmap -
> > devices must be put in 'devdax' mode (drivers/dax/device.c) to get raw
> > mmap capability.
> > 
> > In this commit is just the framework, which remaps pages/folios compatibly
> > with fsdax.
> > 
> > Enabling dax changes:
> > 
> > - bus.h: add DAXDRV_FSDEV_TYPE driver type
> > - bus.c: allow DAXDRV_FSDEV_TYPE drivers to bind to daxdevs
> > - dax.h: prototype inode_dax(), which fsdev needs
> > 
> > Suggested-by: Dan Williams <dan.j.williams@intel.com>
> > Suggested-by: Gregory Price <gourry@gourry.net>
> > Signed-off-by: John Groves <john@groves.net>
> 
> A few comments inline.  I think some of the code here could be moved
> to a helper library used by both this and device.c
> 
> > ---
> >  MAINTAINERS          |   8 ++
> >  drivers/dax/Makefile |   6 +
> >  drivers/dax/bus.c    |   4 +
> >  drivers/dax/bus.h    |   1 +
> >  drivers/dax/fsdev.c  | 253 +++++++++++++++++++++++++++++++++++++++++++
> >  fs/dax.c             |   1 +
> >  include/linux/dax.h  |   3 +
> >  7 files changed, 276 insertions(+)
> >  create mode 100644 drivers/dax/fsdev.c
> > 
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 96ea84948d76..e83cfcf7e932 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -7298,6 +7298,14 @@ L:	linux-cxl@vger.kernel.org
> >  S:	Supported
> >  F:	drivers/dax/
> >  
> > +DEVICE DIRECT ACCESS (DAX) [fsdev_dax]
> > +M:	John Groves <jgroves@micron.com>
> > +M:	John Groves <John@Groves.net>
> > +L:	nvdimm@lists.linux.dev
> > +L:	linux-cxl@vger.kernel.org
> > +S:	Supported
> > +F:	drivers/dax/fsdev.c
> > +
> >  DEVICE FREQUENCY (DEVFREQ)
> >  M:	MyungJoo Ham <myungjoo.ham@samsung.com>
> >  M:	Kyungmin Park <kyungmin.park@samsung.com>
> > diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile
> > index 5ed5c39857c8..3bae252fd1bf 100644
> > --- a/drivers/dax/Makefile
> > +++ b/drivers/dax/Makefile
> > @@ -5,10 +5,16 @@ obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
> >  obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
> >  obj-$(CONFIG_DEV_DAX_CXL) += dax_cxl.o
> >  
> > +# fsdev_dax: fs-dax compatible devdax driver (needs DEV_DAX and FS_DAX)
> > +ifeq ($(CONFIG_FS_DAX),y)
> > +obj-$(CONFIG_DEV_DAX) += fsdev_dax.o
> > +endif
> 
> Why not throw in a new CONFIG_FSDAX_DEV and handle the dependencies
> in Kconfig?  

At one point I had another config parameter, but I'm trying not to
gratuitously add them. The fsdev driver is pretty small, and including it
whenever FS_DAX is enabled felt reasonable to me. I'm willing to change it
if there's a consensus that way.

> 
> > +
> >  dax-y := super.o
> >  dax-y += bus.o
> >  device_dax-y := device.o
> >  dax_pmem-y := pmem.o
> >  dax_cxl-y := cxl.o
> > +fsdev_dax-y := fsdev.o
> >  
> >  obj-y += hmem/
> 
> > diff --git a/drivers/dax/fsdev.c b/drivers/dax/fsdev.c
> > new file mode 100644
> > index 000000000000..e5b4396ce401
> > --- /dev/null
> > +++ b/drivers/dax/fsdev.c
> 
> > +static int fsdev_dax_probe(struct dev_dax *dev_dax)
> > +{
> > +	struct dax_device *dax_dev = dev_dax->dax_dev;
> > +	struct device *dev = &dev_dax->dev;
> > +	struct dev_pagemap *pgmap;
> > +	u64 data_offset = 0;
> 
> See below. I think you can useful reduce scope of this one.

As of now, I've reduced the scope, but in the very next commit it needs to
move back here. So meh...not sure that's worth it for one commit

> 
> > +	struct inode *inode;
> > +	struct cdev *cdev;
> > +	void *addr;
> > +	int rc, i;
> > +
> 
> There is a lot of duplication in here with dax/device.c
> Is any of it suitable for shared helpers?

I haven't addressed factoring out more duplicated code yet. Ideally I'd like
to do that after the initial merge, but I'm paying attention to whether 
there's pressure to do it.

> 
> > +	if (static_dev_dax(dev_dax))  {
> > +		if (dev_dax->nr_range > 1) {
> > +			dev_warn(dev, "static pgmap / multi-range device conflict\n");
> > +			return -EINVAL;
> > +		}
> > +
> > +		pgmap = dev_dax->pgmap;
> > +	} else {
> > +		size_t pgmap_size;
> > +
> > +		if (dev_dax->pgmap) {
> > +			dev_warn(dev, "dynamic-dax with pre-populated page map\n");
> > +			return -EINVAL;
> > +		}
> > +
> > +		pgmap_size = struct_size(pgmap, ranges, dev_dax->nr_range - 1);
> > +		pgmap = devm_kzalloc(dev, pgmap_size,  GFP_KERNEL);
> 
> Bonus space before GFP_KERNEL.

Excised, thanks

> 
> 
> > +		if (!pgmap)
> > +			return -ENOMEM;
> > +
> > +		pgmap->nr_range = dev_dax->nr_range;
> > +		dev_dax->pgmap = pgmap;
> > +
> > +		for (i = 0; i < dev_dax->nr_range; i++) {
> > +			struct range *range = &dev_dax->ranges[i].range;
> > +
> > +			pgmap->ranges[i] = *range;
> > +		}
> > +	}
> > +
> > +	for (i = 0; i < dev_dax->nr_range; i++) {
> > +		struct range *range = &dev_dax->ranges[i].range;
> > +
> > +		if (!devm_request_mem_region(dev, range->start,
> > +					range_len(range), dev_name(dev))) {
> > +			dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve range\n",
> > +				 i, range->start, range->end);
> > +			return -EBUSY;
> > +		}
> > +	}
> 
> Everything above here is shared.  Some sort of _init() or similar library function
> seems in order.

Taken under advisement. Will look at this soon.

> 
> > +
> > +	/*
> > +	 * FS-DAX compatible mode: Use MEMORY_DEVICE_FS_DAX type and
> > +	 * do NOT set vmemmap_shift. This leaves folios at order-0,
> > +	 * allowing fs-dax to dynamically create compound folios as needed
> > +	 * (similar to pmem behavior).
> > +	 */
> > +	pgmap->type = MEMORY_DEVICE_FS_DAX;
> > +	pgmap->ops = &fsdev_pagemap_ops;
> > +	pgmap->owner = dev_dax;
> > +
> > +	/*
> > +	 * CRITICAL DIFFERENCE from device.c:
> > +	 * We do NOT set vmemmap_shift here, even if align > PAGE_SIZE.
> > +	 * This ensures folios remain order-0 and are compatible with
> > +	 * fs-dax's folio management.
> > +	 */
> > +
> > +	addr = devm_memremap_pages(dev, pgmap);
> > +	if (IS_ERR(addr))
> > +		return PTR_ERR(addr);
> > +
> > +	/*
> > +	 * Clear any stale compound folio state left over from a previous
> > +	 * driver (e.g., device_dax with vmemmap_shift). Also register this
> > +	 * as a devm action so folio state is cleared on unbind, ensuring
> > +	 * clean pages for subsequent drivers (e.g., kmem for system-ram).
> > +	 */
> > +	fsdev_clear_folio_state(dev_dax);
> > +	rc = devm_add_action_or_reset(dev, fsdev_clear_folio_state_action,
> > +				      dev_dax);
> > +	if (rc)
> > +		return rc;
> > +
> > +	/* Detect whether the data is at a non-zero offset into the memory */
> > +	if (pgmap->range.start != dev_dax->ranges[0].range.start) {
> > +		u64 phys = dev_dax->ranges[0].range.start;
> > +		u64 pgmap_phys = dev_dax->pgmap[0].range.start;
> > +
> > +		if (!WARN_ON(pgmap_phys > phys))
> > +			data_offset = phys - pgmap_phys;
> > +
> > +		pr_debug("%s: offset detected phys=%llx pgmap_phys=%llx offset=%llx\n",
> > +		       __func__, phys, pgmap_phys, data_offset);
> 
> Might change later, but at least at this point you could pull declaration of data_offset
> into this scope.

done as of now, but it's used right after the closing brace of this block
in the very next commit.

> 
> > +	}
> > +
> > +	inode = dax_inode(dax_dev);
> > +	cdev = inode->i_cdev;
> > +	cdev_init(cdev, &fsdev_fops);
> > +	cdev->owner = dev->driver->owner;
> > +	cdev_set_parent(cdev, &dev->kobj);
> > +	rc = cdev_add(cdev, dev->devt, 1);
> > +	if (rc)
> > +		return rc;
> > +
> > +	rc = devm_add_action_or_reset(dev, fsdev_cdev_del, cdev);
> > +	if (rc)
> > +		return rc;
> > +
> > +	run_dax(dax_dev);
> > +	return devm_add_action_or_reset(dev, fsdev_kill, dev_dax);
> > +}
> 
> > diff --git a/include/linux/dax.h b/include/linux/dax.h
> > index bf103f317cac..996493f5c538 100644
> > --- a/include/linux/dax.h
> > +++ b/include/linux/dax.h
> > @@ -51,6 +51,7 @@ struct dax_holder_operations {
> >  
> >  #if IS_ENABLED(CONFIG_DAX)
> >  struct dax_device *alloc_dax(void *private, const struct dax_operations *ops);
> > +
> 
> Unrelated change.  Tidy this up for v9.

Spurious blank line dropped - thanks

> 
> 
> >  void *dax_holder(struct dax_device *dax_dev);
> >  void put_dax(struct dax_device *dax_dev);
> >  void kill_dax(struct dax_device *dax_dev);
> > @@ -151,8 +152,10 @@ static inline void fs_put_dax(struct dax_device *dax_dev, void *holder)
> >  #endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
> >  
> >  #if IS_ENABLED(CONFIG_FS_DAX)
> > +struct dax_device *inode_dax(struct inode *inode);
> 
> Already in dax_private.h so why does it want to be here?

Indeed, thanks!

Regards,
John
Re: [PATCH V8 3/8] dax: add fsdev.c driver for fs-dax on character dax
Posted by Jonathan Cameron 2 weeks ago
> > > diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile
> > > index 5ed5c39857c8..3bae252fd1bf 100644
> > > --- a/drivers/dax/Makefile
> > > +++ b/drivers/dax/Makefile
> > > @@ -5,10 +5,16 @@ obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
> > >  obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
> > >  obj-$(CONFIG_DEV_DAX_CXL) += dax_cxl.o
> > >  
> > > +# fsdev_dax: fs-dax compatible devdax driver (needs DEV_DAX and FS_DAX)
> > > +ifeq ($(CONFIG_FS_DAX),y)
> > > +obj-$(CONFIG_DEV_DAX) += fsdev_dax.o
> > > +endif  
> > 
> > Why not throw in a new CONFIG_FSDAX_DEV and handle the dependencies
> > in Kconfig?    
> 
> At one point I had another config parameter, but I'm trying not to
> gratuitously add them. The fsdev driver is pretty small, and including it
> whenever FS_DAX is enabled felt reasonable to me. I'm willing to change it
> if there's a consensus that way.

You can make the build do exactly the same thing with a separate Kconfig
option. Just moves where the dependency tracking is. I'd prefer Kconfig
because that's generally where I'd look for something like this.


Jonathan
Re: [PATCH V8 3/8] dax: add fsdev.c driver for fs-dax on character dax
Posted by John Groves 1 week, 6 days ago
On 26/03/23 12:12PM, Jonathan Cameron wrote:
> 
> > > > diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile
> > > > index 5ed5c39857c8..3bae252fd1bf 100644
> > > > --- a/drivers/dax/Makefile
> > > > +++ b/drivers/dax/Makefile
> > > > @@ -5,10 +5,16 @@ obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
> > > >  obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
> > > >  obj-$(CONFIG_DEV_DAX_CXL) += dax_cxl.o
> > > >  
> > > > +# fsdev_dax: fs-dax compatible devdax driver (needs DEV_DAX and FS_DAX)
> > > > +ifeq ($(CONFIG_FS_DAX),y)
> > > > +obj-$(CONFIG_DEV_DAX) += fsdev_dax.o
> > > > +endif  
> > > 
> > > Why not throw in a new CONFIG_FSDAX_DEV and handle the dependencies
> > > in Kconfig?    
> > 
> > At one point I had another config parameter, but I'm trying not to
> > gratuitously add them. The fsdev driver is pretty small, and including it
> > whenever FS_DAX is enabled felt reasonable to me. I'm willing to change it
> > if there's a consensus that way.
> 
> You can make the build do exactly the same thing with a separate Kconfig
> option. Just moves where the dependency tracking is. I'd prefer Kconfig
> because that's generally where I'd look for something like this.
> 
> 
> Jonathan

OK, will do. It will be CONFIG_DEV_DAX_FSDEV for naming consistency.

V9 coming within 24 hours...

John