[v1] Guaranteed CMA

[PATCH 7/8] mm: introduce GCMA

Posted by Suren Baghdasaryan 4 months ago

From: Minchan Kim <minchan@google.com>

This patch introduces GCMA (Guaranteed Contiguous Memory Allocator)
cleacache backend which reserves some amount of memory at the boot
and then donates it to store clean file-backed pages in the cleancache.
GCMA aims to guarantee contiguous memory allocation success as well as
low and deterministic allocation latency.

Notes:
Originally, the idea was posted by SeongJae Park and Minchan Kim [1].
Later Minchan reworked it to be used in Android as a reference for
Android vendors to use [2].

[1] https://lwn.net/Articles/619865/
[2] https://android-review.googlesource.com/q/topic:%22gcma_6.12%22

Signed-off-by: Minchan Kim <minchan@google.com>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 MAINTAINERS          |   2 +
 include/linux/gcma.h |  36 +++++++
 mm/Kconfig           |  15 +++
 mm/Makefile          |   1 +
 mm/gcma.c            | 231 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 285 insertions(+)
 create mode 100644 include/linux/gcma.h
 create mode 100644 mm/gcma.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 441e68c94177..95b5ad26ec11 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16361,6 +16361,7 @@ F:	Documentation/admin-guide/mm/
 F:	Documentation/mm/
 F:	include/linux/cma.h
 F:	include/linux/dmapool.h
+F:	include/linux/gcma.h
 F:	include/linux/ioremap.h
 F:	include/linux/memory-tiers.h
 F:	include/linux/page_idle.h
@@ -16372,6 +16373,7 @@ F:	mm/dmapool.c
 F:	mm/dmapool_test.c
 F:	mm/early_ioremap.c
 F:	mm/fadvise.c
+F:	mm/gcma.c
 F:	mm/ioremap.c
 F:	mm/mapping_dirty_helpers.c
 F:	mm/memory-tiers.c
diff --git a/include/linux/gcma.h b/include/linux/gcma.h
new file mode 100644
index 000000000000..20b2c85de87b
--- /dev/null
+++ b/include/linux/gcma.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __GCMA_H__
+#define __GCMA_H__
+
+#include <linux/types.h>
+
+#ifdef CONFIG_GCMA
+
+int gcma_register_area(const char *name,
+		       unsigned long start_pfn, unsigned long count);
+
+/*
+ * NOTE: allocated pages are still marked reserved and when freeing them
+ * the caller should ensure they are isolated and not referenced by anyone
+ * other than the caller.
+ */
+int gcma_alloc_range(unsigned long start_pfn, unsigned long count, gfp_t gfp);
+int gcma_free_range(unsigned long start_pfn, unsigned long count);
+
+#else /* CONFIG_GCMA */
+
+static inline int gcma_register_area(const char *name,
+				     unsigned long start_pfn,
+				     unsigned long count)
+		{ return -EOPNOTSUPP; }
+static inline int gcma_alloc_range(unsigned long start_pfn,
+				   unsigned long count, gfp_t gfp)
+		{ return -EOPNOTSUPP; }
+
+static inline int gcma_free_range(unsigned long start_pfn,
+				   unsigned long count)
+		{ return -EOPNOTSUPP; }
+
+#endif /* CONFIG_GCMA */
+
+#endif /* __GCMA_H__ */
diff --git a/mm/Kconfig b/mm/Kconfig
index 9f4da8a848f4..41ce5ef8db55 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1013,6 +1013,21 @@ config CMA_AREAS
 
 	  If unsure, leave the default value "8" in UMA and "20" in NUMA.
 
+config GCMA
+       bool "GCMA (Guaranteed Contiguous Memory Allocator)"
+       depends on CLEANCACHE
+	help
+	  This enables the Guaranteed Contiguous Memory Allocator to allow
+	  low latency guaranteed contiguous memory allocations. Memory
+	  reserved by GCMA is donated to cleancache to be used as pagecache
+	  extension. Once GCMA allocation is requested, necessary pages are
+	  taken back from the cleancache and used to satisfy the request.
+	  Cleancache guarantees low latency successful allocation as long
+	  as the total size of GCMA allocations does not exceed the size of
+	  the memory donated to the cleancache.
+
+	  If unsure, say "N".
+
 #
 # Select this config option from the architecture Kconfig, if available, to set
 # the max page order for physically contiguous allocations.
diff --git a/mm/Makefile b/mm/Makefile
index 845841a140e3..05aee66a8b07 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -149,3 +149,4 @@ obj-$(CONFIG_TMPFS_QUOTA) += shmem_quota.o
 obj-$(CONFIG_PT_RECLAIM) += pt_reclaim.o
 obj-$(CONFIG_CLEANCACHE) += cleancache.o
 obj-$(CONFIG_CLEANCACHE_SYSFS)	+= cleancache_sysfs.o
+obj-$(CONFIG_GCMA)	+= gcma.o
diff --git a/mm/gcma.c b/mm/gcma.c
new file mode 100644
index 000000000000..3ee0e1340db3
--- /dev/null
+++ b/mm/gcma.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GCMA (Guaranteed Contiguous Memory Allocator)
+ *
+ */
+
+#define pr_fmt(fmt) "gcma: " fmt
+
+#include <linux/cleancache.h>
+#include <linux/gcma.h>
+#include <linux/hashtable.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+#include "internal.h"
+
+#define MAX_GCMA_AREAS		64
+#define GCMA_AREA_NAME_MAX_LEN	32
+
+struct gcma_area {
+	int pool_id;
+	unsigned long start_pfn;
+	unsigned long end_pfn;
+	char name[GCMA_AREA_NAME_MAX_LEN];
+};
+
+static struct gcma_area areas[MAX_GCMA_AREAS];
+static atomic_t nr_gcma_area = ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(gcma_area_lock);
+
+static int free_folio_range(struct gcma_area *area,
+			     unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long scanned = 0;
+	struct folio *folio;
+	unsigned long pfn;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+		int err;
+
+		if (!(++scanned % XA_CHECK_SCHED))
+			cond_resched();
+
+		folio = pfn_folio(pfn);
+		err = cleancache_backend_put_folio(area->pool_id, folio);
+		if (WARN(err, "PFN %lu: folio is still in use\n", pfn))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int alloc_folio_range(struct gcma_area *area,
+			      unsigned long start_pfn, unsigned long end_pfn,
+			      gfp_t gfp)
+{
+	unsigned long scanned = 0;
+	unsigned long pfn;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+		int err;
+
+		if (!(++scanned % XA_CHECK_SCHED))
+			cond_resched();
+
+		err = cleancache_backend_get_folio(area->pool_id, pfn_folio(pfn));
+		if (err) {
+			free_folio_range(area, start_pfn, pfn);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static struct gcma_area *find_area(unsigned long start_pfn, unsigned long end_pfn)
+{
+	int nr_area = atomic_read_acquire(&nr_gcma_area);
+	int i;
+
+	for (i = 0; i < nr_area; i++) {
+		struct gcma_area *area = &areas[i];
+
+		if (area->end_pfn <= start_pfn)
+			continue;
+
+		if (area->start_pfn > end_pfn)
+			continue;
+
+		/* The entire range should belong to a single area */
+		if (start_pfn < area->start_pfn || end_pfn > area->end_pfn)
+			break;
+
+		/* Found the area containing the entire range */
+		return area;
+	}
+
+	return NULL;
+}
+
+int gcma_register_area(const char *name,
+		       unsigned long start_pfn, unsigned long count)
+{
+	LIST_HEAD(folios);
+	int i, pool_id;
+	int nr_area;
+	int ret = 0;
+
+	pool_id = cleancache_backend_register_pool(name);
+	if (pool_id < 0)
+		return pool_id;
+
+	for (i = 0; i < count; i++) {
+		struct folio *folio;
+
+		folio = pfn_folio(start_pfn + i);
+		folio_clear_reserved(folio);
+		folio_set_count(folio, 0);
+		list_add(&folio->lru, &folios);
+	}
+
+	cleancache_backend_put_folios(pool_id, &folios);
+
+	spin_lock(&gcma_area_lock);
+
+	nr_area = atomic_read(&nr_gcma_area);
+	if (nr_area < MAX_GCMA_AREAS) {
+		struct gcma_area *area = &areas[nr_area];
+
+		area->pool_id = pool_id;
+		area->start_pfn = start_pfn;
+		area->end_pfn = start_pfn + count;
+		strscpy(area->name, name);
+		/* Ensure above stores complete before we increase the count */
+		atomic_set_release(&nr_gcma_area, nr_area + 1);
+	} else {
+		ret = -ENOMEM;
+	}
+
+	spin_unlock(&gcma_area_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gcma_register_area);
+
+int gcma_alloc_range(unsigned long start_pfn, unsigned long count, gfp_t gfp)
+{
+	unsigned long end_pfn = start_pfn + count;
+	struct gcma_area *area;
+	struct folio *folio;
+	int err, order = 0;
+
+	gfp = current_gfp_context(gfp);
+	if (gfp & __GFP_COMP) {
+		if (!is_power_of_2(count))
+			return -EINVAL;
+
+		order = ilog2(count);
+		if (order >= MAX_PAGE_ORDER)
+			return -EINVAL;
+	}
+
+	area = find_area(start_pfn, end_pfn);
+	if (!area)
+		return -EINVAL;
+
+	err = alloc_folio_range(area, start_pfn, end_pfn, gfp);
+	if (err)
+		return err;
+
+	/*
+	 * GCMA returns pages with refcount 1 and expects them to have
+	 * the same refcount 1 when they are freed.
+	 */
+	if (order) {
+		folio = pfn_folio(start_pfn);
+		set_page_count(&folio->page, 1);
+		prep_compound_page(&folio->page, order);
+	} else {
+		for (unsigned long pfn = start_pfn; pfn < end_pfn; pfn++) {
+			folio = pfn_folio(pfn);
+			set_page_count(&folio->page, 1);
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gcma_alloc_range);
+
+int gcma_free_range(unsigned long start_pfn, unsigned long count)
+{
+	unsigned long end_pfn = start_pfn + count;
+	struct gcma_area *area;
+	struct folio *folio;
+
+	area = find_area(start_pfn, end_pfn);
+	if (!area)
+		return -EINVAL;
+
+	folio = pfn_folio(start_pfn);
+	if (folio_test_large(folio)) {
+		int expected = folio_nr_pages(folio);
+
+		if (WARN(count != expected, "PFN %lu: count %lu != expected %d\n",
+			  start_pfn, count, expected))
+			return -EINVAL;
+
+		if (WARN(!folio_ref_dec_and_test(folio),
+			 "PFN %lu: invalid folio refcount when freeing\n", start_pfn))
+			return -EINVAL;
+
+		free_pages_prepare(&folio->page, folio_order(folio));
+	} else {
+		for (unsigned long pfn = start_pfn; pfn < end_pfn; pfn++) {
+			folio = pfn_folio(pfn);
+			if (folio_nr_pages(folio) == 1)
+				count--;
+
+			if (WARN(!folio_ref_dec_and_test(folio),
+				 "PFN %lu: invalid folio refcount when freeing\n", pfn))
+				return -EINVAL;
+
+			free_pages_prepare(&folio->page, 0);
+		}
+		WARN(count != 0, "%lu pages are still in use!\n", count);
+	}
+
+	return free_folio_range(area, start_pfn, end_pfn);
+}
+EXPORT_SYMBOL_GPL(gcma_free_range);
-- 
2.51.0.740.g6adb054d12-goog

Re: [PATCH 7/8] mm: introduce GCMA

Posted by SeongJae Park 4 months ago

Hello Suren,

On Thu,  9 Oct 2025 18:19:50 -0700 Suren Baghdasaryan <surenb@google.com> wrote:

> From: Minchan Kim <minchan@google.com>
> 
> This patch introduces GCMA (Guaranteed Contiguous Memory Allocator)
> cleacache backend which reserves some amount of memory at the boot
> and then donates it to store clean file-backed pages in the cleancache.
> GCMA aims to guarantee contiguous memory allocation success as well as
> low and deterministic allocation latency.
> 
> Notes:
> Originally, the idea was posted by SeongJae Park and Minchan Kim [1].
> Later Minchan reworked it to be used in Android as a reference for
> Android vendors to use [2].
> 
> [1] https://lwn.net/Articles/619865/
> [2] https://android-review.googlesource.com/q/topic:%22gcma_6.12%22
> 
> Signed-off-by: Minchan Kim <minchan@google.com>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> ---
>  MAINTAINERS          |   2 +
>  include/linux/gcma.h |  36 +++++++
>  mm/Kconfig           |  15 +++
>  mm/Makefile          |   1 +
>  mm/gcma.c            | 231 +++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 285 insertions(+)
>  create mode 100644 include/linux/gcma.h
>  create mode 100644 mm/gcma.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 441e68c94177..95b5ad26ec11 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -16361,6 +16361,7 @@ F:	Documentation/admin-guide/mm/
>  F:	Documentation/mm/
>  F:	include/linux/cma.h
>  F:	include/linux/dmapool.h
> +F:	include/linux/gcma.h
>  F:	include/linux/ioremap.h
>  F:	include/linux/memory-tiers.h
>  F:	include/linux/page_idle.h
> @@ -16372,6 +16373,7 @@ F:	mm/dmapool.c
>  F:	mm/dmapool_test.c
>  F:	mm/early_ioremap.c
>  F:	mm/fadvise.c
> +F:	mm/gcma.c
>  F:	mm/ioremap.c
>  F:	mm/mapping_dirty_helpers.c
>  F:	mm/memory-tiers.c
> diff --git a/include/linux/gcma.h b/include/linux/gcma.h
> new file mode 100644
> index 000000000000..20b2c85de87b
> --- /dev/null
> +++ b/include/linux/gcma.h
> @@ -0,0 +1,36 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __GCMA_H__
> +#define __GCMA_H__
> +
> +#include <linux/types.h>
> +
> +#ifdef CONFIG_GCMA
> +
> +int gcma_register_area(const char *name,
> +		       unsigned long start_pfn, unsigned long count);
> +
> +/*
> + * NOTE: allocated pages are still marked reserved and when freeing them
> + * the caller should ensure they are isolated and not referenced by anyone
> + * other than the caller.
> + */
> +int gcma_alloc_range(unsigned long start_pfn, unsigned long count, gfp_t gfp);
> +int gcma_free_range(unsigned long start_pfn, unsigned long count);
> +
> +#else /* CONFIG_GCMA */
> +
> +static inline int gcma_register_area(const char *name,
> +				     unsigned long start_pfn,
> +				     unsigned long count)
> +		{ return -EOPNOTSUPP; }
> +static inline int gcma_alloc_range(unsigned long start_pfn,
> +				   unsigned long count, gfp_t gfp)
> +		{ return -EOPNOTSUPP; }
> +
> +static inline int gcma_free_range(unsigned long start_pfn,
> +				   unsigned long count)
> +		{ return -EOPNOTSUPP; }
> +
> +#endif /* CONFIG_GCMA */
> +
> +#endif /* __GCMA_H__ */
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 9f4da8a848f4..41ce5ef8db55 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -1013,6 +1013,21 @@ config CMA_AREAS
>  
>  	  If unsure, leave the default value "8" in UMA and "20" in NUMA.
>  
> +config GCMA
> +       bool "GCMA (Guaranteed Contiguous Memory Allocator)"
> +       depends on CLEANCACHE
> +	help
> +	  This enables the Guaranteed Contiguous Memory Allocator to allow
> +	  low latency guaranteed contiguous memory allocations. Memory
> +	  reserved by GCMA is donated to cleancache to be used as pagecache
> +	  extension. Once GCMA allocation is requested, necessary pages are
> +	  taken back from the cleancache and used to satisfy the request.
> +	  Cleancache guarantees low latency successful allocation as long
> +	  as the total size of GCMA allocations does not exceed the size of
> +	  the memory donated to the cleancache.
> +
> +	  If unsure, say "N".
> +
>  #
>  # Select this config option from the architecture Kconfig, if available, to set
>  # the max page order for physically contiguous allocations.
> diff --git a/mm/Makefile b/mm/Makefile
> index 845841a140e3..05aee66a8b07 100644
> --- a/mm/Makefile
> +++ b/mm/Makefile
> @@ -149,3 +149,4 @@ obj-$(CONFIG_TMPFS_QUOTA) += shmem_quota.o
>  obj-$(CONFIG_PT_RECLAIM) += pt_reclaim.o
>  obj-$(CONFIG_CLEANCACHE) += cleancache.o
>  obj-$(CONFIG_CLEANCACHE_SYSFS)	+= cleancache_sysfs.o
> +obj-$(CONFIG_GCMA)	+= gcma.o
> diff --git a/mm/gcma.c b/mm/gcma.c
> new file mode 100644
> index 000000000000..3ee0e1340db3
> --- /dev/null
> +++ b/mm/gcma.c
> @@ -0,0 +1,231 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * GCMA (Guaranteed Contiguous Memory Allocator)
> + *
> + */
> +
> +#define pr_fmt(fmt) "gcma: " fmt
> +
> +#include <linux/cleancache.h>
> +#include <linux/gcma.h>
> +#include <linux/hashtable.h>
> +#include <linux/highmem.h>
> +#include <linux/idr.h>
> +#include <linux/slab.h>
> +#include <linux/xarray.h>
> +#include "internal.h"
> +
> +#define MAX_GCMA_AREAS		64
> +#define GCMA_AREA_NAME_MAX_LEN	32
> +
> +struct gcma_area {
> +	int pool_id;
> +	unsigned long start_pfn;
> +	unsigned long end_pfn;
> +	char name[GCMA_AREA_NAME_MAX_LEN];
> +};
> +
> +static struct gcma_area areas[MAX_GCMA_AREAS];
> +static atomic_t nr_gcma_area = ATOMIC_INIT(0);
> +static DEFINE_SPINLOCK(gcma_area_lock);
> +
> +static int free_folio_range(struct gcma_area *area,
> +			     unsigned long start_pfn, unsigned long end_pfn)
> +{
> +	unsigned long scanned = 0;
> +	struct folio *folio;
> +	unsigned long pfn;
> +
> +	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
> +		int err;
> +
> +		if (!(++scanned % XA_CHECK_SCHED))
> +			cond_resched();
> +
> +		folio = pfn_folio(pfn);
> +		err = cleancache_backend_put_folio(area->pool_id, folio);

Why don't you use pfn_folio() directly, like alloc_folio_range() does?

> +		if (WARN(err, "PFN %lu: folio is still in use\n", pfn))
> +			return -EINVAL;

Why don't you return err, like alloc_folio_range() does?

> +	}
> +
> +	return 0;
> +}
> +
> +static int alloc_folio_range(struct gcma_area *area,
> +			      unsigned long start_pfn, unsigned long end_pfn,
> +			      gfp_t gfp)
> +{
> +	unsigned long scanned = 0;
> +	unsigned long pfn;
> +
> +	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
> +		int err;
> +
> +		if (!(++scanned % XA_CHECK_SCHED))
> +			cond_resched();
> +
> +		err = cleancache_backend_get_folio(area->pool_id, pfn_folio(pfn));
> +		if (err) {
> +			free_folio_range(area, start_pfn, pfn);
> +			return err;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static struct gcma_area *find_area(unsigned long start_pfn, unsigned long end_pfn)
> +{
> +	int nr_area = atomic_read_acquire(&nr_gcma_area);
> +	int i;
> +
> +	for (i = 0; i < nr_area; i++) {
> +		struct gcma_area *area = &areas[i];
> +
> +		if (area->end_pfn <= start_pfn)
> +			continue;
> +
> +		if (area->start_pfn > end_pfn)
> +			continue;
> +
> +		/* The entire range should belong to a single area */
> +		if (start_pfn < area->start_pfn || end_pfn > area->end_pfn)
> +			break;
> +
> +		/* Found the area containing the entire range */
> +		return area;
> +	}
> +
> +	return NULL;
> +}
> +
> +int gcma_register_area(const char *name,
> +		       unsigned long start_pfn, unsigned long count)
> +{
> +	LIST_HEAD(folios);
> +	int i, pool_id;
> +	int nr_area;
> +	int ret = 0;
> +
> +	pool_id = cleancache_backend_register_pool(name);
> +	if (pool_id < 0)
> +		return pool_id;
> +
> +	for (i = 0; i < count; i++) {
> +		struct folio *folio;
> +
> +		folio = pfn_folio(start_pfn + i);
> +		folio_clear_reserved(folio);
> +		folio_set_count(folio, 0);
> +		list_add(&folio->lru, &folios);
> +	}
> +
> +	cleancache_backend_put_folios(pool_id, &folios);
> +
> +	spin_lock(&gcma_area_lock);
> +
> +	nr_area = atomic_read(&nr_gcma_area);
> +	if (nr_area < MAX_GCMA_AREAS) {
> +		struct gcma_area *area = &areas[nr_area];
> +
> +		area->pool_id = pool_id;
> +		area->start_pfn = start_pfn;
> +		area->end_pfn = start_pfn + count;
> +		strscpy(area->name, name);
> +		/* Ensure above stores complete before we increase the count */
> +		atomic_set_release(&nr_gcma_area, nr_area + 1);
> +	} else {
> +		ret = -ENOMEM;
> +	}
> +
> +	spin_unlock(&gcma_area_lock);
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(gcma_register_area);
> +
> +int gcma_alloc_range(unsigned long start_pfn, unsigned long count, gfp_t gfp)
> +{
> +	unsigned long end_pfn = start_pfn + count;
> +	struct gcma_area *area;
> +	struct folio *folio;
> +	int err, order = 0;
> +
> +	gfp = current_gfp_context(gfp);
> +	if (gfp & __GFP_COMP) {
> +		if (!is_power_of_2(count))
> +			return -EINVAL;
> +
> +		order = ilog2(count);
> +		if (order >= MAX_PAGE_ORDER)
> +			return -EINVAL;
> +	}
> +
> +	area = find_area(start_pfn, end_pfn);
> +	if (!area)
> +		return -EINVAL;
> +
> +	err = alloc_folio_range(area, start_pfn, end_pfn, gfp);
> +	if (err)
> +		return err;
> +
> +	/*
> +	 * GCMA returns pages with refcount 1 and expects them to have
> +	 * the same refcount 1 when they are freed.
> +	 */
> +	if (order) {
> +		folio = pfn_folio(start_pfn);
> +		set_page_count(&folio->page, 1);
> +		prep_compound_page(&folio->page, order);
> +	} else {
> +		for (unsigned long pfn = start_pfn; pfn < end_pfn; pfn++) {
> +			folio = pfn_folio(pfn);
> +			set_page_count(&folio->page, 1);
> +		}
> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(gcma_alloc_range);

I'm wondering if the rule of exporting symbols only for in-tree modules that
use the symbols should be applied here or not, and why.

> +
> +int gcma_free_range(unsigned long start_pfn, unsigned long count)
> +{
> +	unsigned long end_pfn = start_pfn + count;
> +	struct gcma_area *area;
> +	struct folio *folio;
> +
> +	area = find_area(start_pfn, end_pfn);
> +	if (!area)
> +		return -EINVAL;
> +
> +	folio = pfn_folio(start_pfn);
> +	if (folio_test_large(folio)) {
> +		int expected = folio_nr_pages(folio);

folio_nr_pages() return 'unsigned long'.  Would it be better to match the type?

> +
> +		if (WARN(count != expected, "PFN %lu: count %lu != expected %d\n",
> +			  start_pfn, count, expected))
> +			return -EINVAL;
> +
> +		if (WARN(!folio_ref_dec_and_test(folio),
> +			 "PFN %lu: invalid folio refcount when freeing\n", start_pfn))
> +			return -EINVAL;
> +
> +		free_pages_prepare(&folio->page, folio_order(folio));
> +	} else {
> +		for (unsigned long pfn = start_pfn; pfn < end_pfn; pfn++) {
> +			folio = pfn_folio(pfn);
> +			if (folio_nr_pages(folio) == 1)
> +				count--;
> +
> +			if (WARN(!folio_ref_dec_and_test(folio),
> +				 "PFN %lu: invalid folio refcount when freeing\n", pfn))
> +				return -EINVAL;

Don't we need to increase the previously decreased folio refcounts?

> +
> +			free_pages_prepare(&folio->page, 0);
> +		}
> +		WARN(count != 0, "%lu pages are still in use!\n", count);

Is WARN() but not returning error here ok?

Also, why don't you warn earlier above if 'folio_nr_pages(folio) != 1' ?

> +	}
> +
> +	return free_folio_range(area, start_pfn, end_pfn);
> +}
> +EXPORT_SYMBOL_GPL(gcma_free_range);

Like the gcma_alloc_range() case, I'm curious if this symbol exporting is
somewhat intended and the intention is explained.

> -- 
> 2.51.0.740.g6adb054d12-goog


Thanks,
SJ

Re: [PATCH 7/8] mm: introduce GCMA

Posted by Suren Baghdasaryan 4 months ago

On Fri, Oct 10, 2025 at 2:11 PM SeongJae Park <sj@kernel.org> wrote:
>
> Hello Suren,
>
> On Thu,  9 Oct 2025 18:19:50 -0700 Suren Baghdasaryan <surenb@google.com> wrote:
>
> > From: Minchan Kim <minchan@google.com>
> >
> > This patch introduces GCMA (Guaranteed Contiguous Memory Allocator)
> > cleacache backend which reserves some amount of memory at the boot
> > and then donates it to store clean file-backed pages in the cleancache.
> > GCMA aims to guarantee contiguous memory allocation success as well as
> > low and deterministic allocation latency.
> >
> > Notes:
> > Originally, the idea was posted by SeongJae Park and Minchan Kim [1].
> > Later Minchan reworked it to be used in Android as a reference for
> > Android vendors to use [2].
> >
> > [1] https://lwn.net/Articles/619865/
> > [2] https://android-review.googlesource.com/q/topic:%22gcma_6.12%22
> >
> > Signed-off-by: Minchan Kim <minchan@google.com>
> > Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> > ---
> >  MAINTAINERS          |   2 +
> >  include/linux/gcma.h |  36 +++++++
> >  mm/Kconfig           |  15 +++
> >  mm/Makefile          |   1 +
> >  mm/gcma.c            | 231 +++++++++++++++++++++++++++++++++++++++++++
> >  5 files changed, 285 insertions(+)
> >  create mode 100644 include/linux/gcma.h
> >  create mode 100644 mm/gcma.c
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 441e68c94177..95b5ad26ec11 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -16361,6 +16361,7 @@ F:    Documentation/admin-guide/mm/
> >  F:   Documentation/mm/
> >  F:   include/linux/cma.h
> >  F:   include/linux/dmapool.h
> > +F:   include/linux/gcma.h
> >  F:   include/linux/ioremap.h
> >  F:   include/linux/memory-tiers.h
> >  F:   include/linux/page_idle.h
> > @@ -16372,6 +16373,7 @@ F:    mm/dmapool.c
> >  F:   mm/dmapool_test.c
> >  F:   mm/early_ioremap.c
> >  F:   mm/fadvise.c
> > +F:   mm/gcma.c
> >  F:   mm/ioremap.c
> >  F:   mm/mapping_dirty_helpers.c
> >  F:   mm/memory-tiers.c
> > diff --git a/include/linux/gcma.h b/include/linux/gcma.h
> > new file mode 100644
> > index 000000000000..20b2c85de87b
> > --- /dev/null
> > +++ b/include/linux/gcma.h
> > @@ -0,0 +1,36 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#ifndef __GCMA_H__
> > +#define __GCMA_H__
> > +
> > +#include <linux/types.h>
> > +
> > +#ifdef CONFIG_GCMA
> > +
> > +int gcma_register_area(const char *name,
> > +                    unsigned long start_pfn, unsigned long count);
> > +
> > +/*
> > + * NOTE: allocated pages are still marked reserved and when freeing them
> > + * the caller should ensure they are isolated and not referenced by anyone
> > + * other than the caller.
> > + */
> > +int gcma_alloc_range(unsigned long start_pfn, unsigned long count, gfp_t gfp);
> > +int gcma_free_range(unsigned long start_pfn, unsigned long count);
> > +
> > +#else /* CONFIG_GCMA */
> > +
> > +static inline int gcma_register_area(const char *name,
> > +                                  unsigned long start_pfn,
> > +                                  unsigned long count)
> > +             { return -EOPNOTSUPP; }
> > +static inline int gcma_alloc_range(unsigned long start_pfn,
> > +                                unsigned long count, gfp_t gfp)
> > +             { return -EOPNOTSUPP; }
> > +
> > +static inline int gcma_free_range(unsigned long start_pfn,
> > +                                unsigned long count)
> > +             { return -EOPNOTSUPP; }
> > +
> > +#endif /* CONFIG_GCMA */
> > +
> > +#endif /* __GCMA_H__ */
> > diff --git a/mm/Kconfig b/mm/Kconfig
> > index 9f4da8a848f4..41ce5ef8db55 100644
> > --- a/mm/Kconfig
> > +++ b/mm/Kconfig
> > @@ -1013,6 +1013,21 @@ config CMA_AREAS
> >
> >         If unsure, leave the default value "8" in UMA and "20" in NUMA.
> >
> > +config GCMA
> > +       bool "GCMA (Guaranteed Contiguous Memory Allocator)"
> > +       depends on CLEANCACHE
> > +     help
> > +       This enables the Guaranteed Contiguous Memory Allocator to allow
> > +       low latency guaranteed contiguous memory allocations. Memory
> > +       reserved by GCMA is donated to cleancache to be used as pagecache
> > +       extension. Once GCMA allocation is requested, necessary pages are
> > +       taken back from the cleancache and used to satisfy the request.
> > +       Cleancache guarantees low latency successful allocation as long
> > +       as the total size of GCMA allocations does not exceed the size of
> > +       the memory donated to the cleancache.
> > +
> > +       If unsure, say "N".
> > +
> >  #
> >  # Select this config option from the architecture Kconfig, if available, to set
> >  # the max page order for physically contiguous allocations.
> > diff --git a/mm/Makefile b/mm/Makefile
> > index 845841a140e3..05aee66a8b07 100644
> > --- a/mm/Makefile
> > +++ b/mm/Makefile
> > @@ -149,3 +149,4 @@ obj-$(CONFIG_TMPFS_QUOTA) += shmem_quota.o
> >  obj-$(CONFIG_PT_RECLAIM) += pt_reclaim.o
> >  obj-$(CONFIG_CLEANCACHE) += cleancache.o
> >  obj-$(CONFIG_CLEANCACHE_SYSFS)       += cleancache_sysfs.o
> > +obj-$(CONFIG_GCMA)   += gcma.o
> > diff --git a/mm/gcma.c b/mm/gcma.c
> > new file mode 100644
> > index 000000000000..3ee0e1340db3
> > --- /dev/null
> > +++ b/mm/gcma.c
> > @@ -0,0 +1,231 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * GCMA (Guaranteed Contiguous Memory Allocator)
> > + *
> > + */
> > +
> > +#define pr_fmt(fmt) "gcma: " fmt
> > +
> > +#include <linux/cleancache.h>
> > +#include <linux/gcma.h>
> > +#include <linux/hashtable.h>
> > +#include <linux/highmem.h>
> > +#include <linux/idr.h>
> > +#include <linux/slab.h>
> > +#include <linux/xarray.h>
> > +#include "internal.h"
> > +
> > +#define MAX_GCMA_AREAS               64
> > +#define GCMA_AREA_NAME_MAX_LEN       32
> > +
> > +struct gcma_area {
> > +     int pool_id;
> > +     unsigned long start_pfn;
> > +     unsigned long end_pfn;
> > +     char name[GCMA_AREA_NAME_MAX_LEN];
> > +};
> > +
> > +static struct gcma_area areas[MAX_GCMA_AREAS];
> > +static atomic_t nr_gcma_area = ATOMIC_INIT(0);
> > +static DEFINE_SPINLOCK(gcma_area_lock);
> > +
> > +static int free_folio_range(struct gcma_area *area,
> > +                          unsigned long start_pfn, unsigned long end_pfn)
> > +{
> > +     unsigned long scanned = 0;
> > +     struct folio *folio;
> > +     unsigned long pfn;
> > +
> > +     for (pfn = start_pfn; pfn < end_pfn; pfn++) {
> > +             int err;
> > +
> > +             if (!(++scanned % XA_CHECK_SCHED))
> > +                     cond_resched();
> > +
> > +             folio = pfn_folio(pfn);
> > +             err = cleancache_backend_put_folio(area->pool_id, folio);
>
> Why don't you use pfn_folio() directly, like alloc_folio_range() does?

Yes, that would be better. Will change.

>
> > +             if (WARN(err, "PFN %lu: folio is still in use\n", pfn))
> > +                     return -EINVAL;
>
> Why don't you return err, like alloc_folio_range() does?

Ack. In my earlier version cleancache_backend_put_folio() was
returning bool, so I had to convert it to int here. But now we can
return err directly. Will change.

>
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static int alloc_folio_range(struct gcma_area *area,
> > +                           unsigned long start_pfn, unsigned long end_pfn,
> > +                           gfp_t gfp)
> > +{
> > +     unsigned long scanned = 0;
> > +     unsigned long pfn;
> > +
> > +     for (pfn = start_pfn; pfn < end_pfn; pfn++) {
> > +             int err;
> > +
> > +             if (!(++scanned % XA_CHECK_SCHED))
> > +                     cond_resched();
> > +
> > +             err = cleancache_backend_get_folio(area->pool_id, pfn_folio(pfn));
> > +             if (err) {
> > +                     free_folio_range(area, start_pfn, pfn);
> > +                     return err;
> > +             }
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static struct gcma_area *find_area(unsigned long start_pfn, unsigned long end_pfn)
> > +{
> > +     int nr_area = atomic_read_acquire(&nr_gcma_area);
> > +     int i;
> > +
> > +     for (i = 0; i < nr_area; i++) {
> > +             struct gcma_area *area = &areas[i];
> > +
> > +             if (area->end_pfn <= start_pfn)
> > +                     continue;
> > +
> > +             if (area->start_pfn > end_pfn)
> > +                     continue;
> > +
> > +             /* The entire range should belong to a single area */
> > +             if (start_pfn < area->start_pfn || end_pfn > area->end_pfn)
> > +                     break;
> > +
> > +             /* Found the area containing the entire range */
> > +             return area;
> > +     }
> > +
> > +     return NULL;
> > +}
> > +
> > +int gcma_register_area(const char *name,
> > +                    unsigned long start_pfn, unsigned long count)
> > +{
> > +     LIST_HEAD(folios);
> > +     int i, pool_id;
> > +     int nr_area;
> > +     int ret = 0;
> > +
> > +     pool_id = cleancache_backend_register_pool(name);
> > +     if (pool_id < 0)
> > +             return pool_id;
> > +
> > +     for (i = 0; i < count; i++) {
> > +             struct folio *folio;
> > +
> > +             folio = pfn_folio(start_pfn + i);
> > +             folio_clear_reserved(folio);
> > +             folio_set_count(folio, 0);
> > +             list_add(&folio->lru, &folios);
> > +     }
> > +
> > +     cleancache_backend_put_folios(pool_id, &folios);
> > +
> > +     spin_lock(&gcma_area_lock);
> > +
> > +     nr_area = atomic_read(&nr_gcma_area);
> > +     if (nr_area < MAX_GCMA_AREAS) {
> > +             struct gcma_area *area = &areas[nr_area];
> > +
> > +             area->pool_id = pool_id;
> > +             area->start_pfn = start_pfn;
> > +             area->end_pfn = start_pfn + count;
> > +             strscpy(area->name, name);
> > +             /* Ensure above stores complete before we increase the count */
> > +             atomic_set_release(&nr_gcma_area, nr_area + 1);
> > +     } else {
> > +             ret = -ENOMEM;
> > +     }
> > +
> > +     spin_unlock(&gcma_area_lock);
> > +
> > +     return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(gcma_register_area);
> > +
> > +int gcma_alloc_range(unsigned long start_pfn, unsigned long count, gfp_t gfp)
> > +{
> > +     unsigned long end_pfn = start_pfn + count;
> > +     struct gcma_area *area;
> > +     struct folio *folio;
> > +     int err, order = 0;
> > +
> > +     gfp = current_gfp_context(gfp);
> > +     if (gfp & __GFP_COMP) {
> > +             if (!is_power_of_2(count))
> > +                     return -EINVAL;
> > +
> > +             order = ilog2(count);
> > +             if (order >= MAX_PAGE_ORDER)
> > +                     return -EINVAL;
> > +     }
> > +
> > +     area = find_area(start_pfn, end_pfn);
> > +     if (!area)
> > +             return -EINVAL;
> > +
> > +     err = alloc_folio_range(area, start_pfn, end_pfn, gfp);
> > +     if (err)
> > +             return err;
> > +
> > +     /*
> > +      * GCMA returns pages with refcount 1 and expects them to have
> > +      * the same refcount 1 when they are freed.
> > +      */
> > +     if (order) {
> > +             folio = pfn_folio(start_pfn);
> > +             set_page_count(&folio->page, 1);
> > +             prep_compound_page(&folio->page, order);
> > +     } else {
> > +             for (unsigned long pfn = start_pfn; pfn < end_pfn; pfn++) {
> > +                     folio = pfn_folio(pfn);
> > +                     set_page_count(&folio->page, 1);
> > +             }
> > +     }
> > +
> > +     return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(gcma_alloc_range);
>
> I'm wondering if the rule of exporting symbols only for in-tree modules that
> use the symbols should be applied here or not, and why.

In Android we use gcma_alloc_range() in vendor-defined dmabuf-heap
modules. That's why I need this API to be exported.

>
> > +
> > +int gcma_free_range(unsigned long start_pfn, unsigned long count)
> > +{
> > +     unsigned long end_pfn = start_pfn + count;
> > +     struct gcma_area *area;
> > +     struct folio *folio;
> > +
> > +     area = find_area(start_pfn, end_pfn);
> > +     if (!area)
> > +             return -EINVAL;
> > +
> > +     folio = pfn_folio(start_pfn);
> > +     if (folio_test_large(folio)) {
> > +             int expected = folio_nr_pages(folio);
>
> folio_nr_pages() return 'unsigned long'.  Would it be better to match the type?

Yes! Ack.

>
> > +
> > +             if (WARN(count != expected, "PFN %lu: count %lu != expected %d\n",
> > +                       start_pfn, count, expected))
> > +                     return -EINVAL;
> > +
> > +             if (WARN(!folio_ref_dec_and_test(folio),
> > +                      "PFN %lu: invalid folio refcount when freeing\n", start_pfn))
> > +                     return -EINVAL;
> > +
> > +             free_pages_prepare(&folio->page, folio_order(folio));
> > +     } else {
> > +             for (unsigned long pfn = start_pfn; pfn < end_pfn; pfn++) {
> > +                     folio = pfn_folio(pfn);
> > +                     if (folio_nr_pages(folio) == 1)
> > +                             count--;
> > +
> > +                     if (WARN(!folio_ref_dec_and_test(folio),
> > +                              "PFN %lu: invalid folio refcount when freeing\n", pfn))
> > +                             return -EINVAL;
>
> Don't we need to increase the previously decreased folio refcounts?

Yes, you are right. If any folio refcount is incorrect here (folio is
still in use), we should restore the refcount for all folios that we
have already processed. I think I'll also need to do 2 passes here:
first drop and check the refcount on all folios, then call
free_pages_prepare() if all folios are unused.

And also need to remove all these WARN()'s which I had for
debugging... Will remove in the next version.

>
> > +
> > +                     free_pages_prepare(&folio->page, 0);
> > +             }
> > +             WARN(count != 0, "%lu pages are still in use!\n", count);
>
> Is WARN() but not returning error here ok?

No. I'll rework this loop to perform 2 passes as I mentioned before
and if the first pass detects any mistake, it will restore previous
refcounts and return an error.

>
> Also, why don't you warn earlier above if 'folio_nr_pages(folio) != 1' ?

I'll remove all these warnings and change the code to fail and restore
folios if any folio in the range does not meet our expectations.

>
> > +     }
> > +
> > +     return free_folio_range(area, start_pfn, end_pfn);
> > +}
> > +EXPORT_SYMBOL_GPL(gcma_free_range);
>
> Like the gcma_alloc_range() case, I'm curious if this symbol exporting is
> somewhat intended and the intention is explained.

Same reasons. Vendor-provided dmabuf heap modules might use it.

Thanks,
Suren.

>
> > --
> > 2.51.0.740.g6adb054d12-goog
>
>
> Thanks,
> SJ