[PATCH v3 4/4] mm: perform VMA allocation, freeing, duplication in mm

Lorenzo Stoakes posted 4 patches 7 months, 3 weeks ago
[PATCH v3 4/4] mm: perform VMA allocation, freeing, duplication in mm
Posted by Lorenzo Stoakes 7 months, 3 weeks ago
Right now these are performed in kernel/fork.c which is odd and a violation
of separation of concerns, as well as preventing us from integrating this
and related logic into userland VMA testing going forward, and perhaps more
importantly - enabling us to, in a subsequent commit, make VMA
allocation/freeing a purely internal mm operation.

There is a fly in the ointment - nommu - mmap.c is not compiled if
CONFIG_MMU not set, and neither is vma.c.

To square the circle, let's add a new file - vma_init.c. This will be
compiled for both CONFIG_MMU and nommu builds, and will also form part of
the VMA userland testing.

This allows us to de-duplicate code, while maintaining separation of
concerns and the ability for us to userland test this logic.

Update the VMA userland tests accordingly, additionally adding a
detach_free_vma() helper function to correctly detach VMAs before freeing
them in test code, as this change was triggering the assert for this.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 MAINTAINERS                      |   1 +
 kernel/fork.c                    |  88 -------------------
 mm/Makefile                      |   2 +-
 mm/mmap.c                        |   3 +-
 mm/nommu.c                       |   4 +-
 mm/vma.h                         |   7 ++
 mm/vma_init.c                    | 101 ++++++++++++++++++++++
 tools/testing/vma/Makefile       |   2 +-
 tools/testing/vma/vma.c          |  26 ++++--
 tools/testing/vma/vma_internal.h | 143 +++++++++++++++++++++++++------
 10 files changed, 251 insertions(+), 126 deletions(-)
 create mode 100644 mm/vma_init.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 1ee1c22e6e36..d274e6802ba5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15656,6 +15656,7 @@ F:	mm/mseal.c
 F:	mm/vma.c
 F:	mm/vma.h
 F:	mm/vma_exec.c
+F:	mm/vma_init.c
 F:	mm/vma_internal.h
 F:	tools/testing/selftests/mm/merge.c
 F:	tools/testing/vma/
diff --git a/kernel/fork.c b/kernel/fork.c
index ac9f9267a473..9e4616dacd82 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -431,88 +431,9 @@ struct kmem_cache *files_cachep;
 /* SLAB cache for fs_struct structures (tsk->fs) */
 struct kmem_cache *fs_cachep;
 
-/* SLAB cache for vm_area_struct structures */
-static struct kmem_cache *vm_area_cachep;
-
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
-struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
-{
-	struct vm_area_struct *vma;
-
-	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
-	if (!vma)
-		return NULL;
-
-	vma_init(vma, mm);
-
-	return vma;
-}
-
-static void vm_area_init_from(const struct vm_area_struct *src,
-			      struct vm_area_struct *dest)
-{
-	dest->vm_mm = src->vm_mm;
-	dest->vm_ops = src->vm_ops;
-	dest->vm_start = src->vm_start;
-	dest->vm_end = src->vm_end;
-	dest->anon_vma = src->anon_vma;
-	dest->vm_pgoff = src->vm_pgoff;
-	dest->vm_file = src->vm_file;
-	dest->vm_private_data = src->vm_private_data;
-	vm_flags_init(dest, src->vm_flags);
-	memcpy(&dest->vm_page_prot, &src->vm_page_prot,
-	       sizeof(dest->vm_page_prot));
-	/*
-	 * src->shared.rb may be modified concurrently when called from
-	 * dup_mmap(), but the clone will reinitialize it.
-	 */
-	data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
-	memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
-	       sizeof(dest->vm_userfaultfd_ctx));
-#ifdef CONFIG_ANON_VMA_NAME
-	dest->anon_name = src->anon_name;
-#endif
-#ifdef CONFIG_SWAP
-	memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
-	       sizeof(dest->swap_readahead_info));
-#endif
-#ifndef CONFIG_MMU
-	dest->vm_region = src->vm_region;
-#endif
-#ifdef CONFIG_NUMA
-	dest->vm_policy = src->vm_policy;
-#endif
-}
-
-struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
-{
-	struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
-
-	if (!new)
-		return NULL;
-
-	ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
-	ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
-	vm_area_init_from(orig, new);
-	vma_lock_init(new, true);
-	INIT_LIST_HEAD(&new->anon_vma_chain);
-	vma_numab_state_init(new);
-	dup_anon_vma_name(orig, new);
-
-	return new;
-}
-
-void vm_area_free(struct vm_area_struct *vma)
-{
-	/* The vma should be detached while being destroyed. */
-	vma_assert_detached(vma);
-	vma_numab_state_free(vma);
-	free_anon_vma_name(vma);
-	kmem_cache_free(vm_area_cachep, vma);
-}
-
 static void account_kernel_stack(struct task_struct *tsk, int account)
 {
 	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
@@ -3033,11 +2954,6 @@ void __init mm_cache_init(void)
 
 void __init proc_caches_init(void)
 {
-	struct kmem_cache_args args = {
-		.use_freeptr_offset = true,
-		.freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
-	};
-
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
@@ -3054,10 +2970,6 @@ void __init proc_caches_init(void)
 			sizeof(struct fs_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
 			NULL);
-	vm_area_cachep = kmem_cache_create("vm_area_struct",
-			sizeof(struct vm_area_struct), &args,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
-			SLAB_ACCOUNT);
 	mmap_init();
 	nsproxy_cache_init();
 }
diff --git a/mm/Makefile b/mm/Makefile
index 15a901bb431a..690ddcf7d9a1 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -55,7 +55,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
 			   mm_init.o percpu.o slab_common.o \
 			   compaction.o show_mem.o \
 			   interval_tree.o list_lru.o workingset.o \
-			   debug.o gup.o mmap_lock.o $(mmu-y)
+			   debug.o gup.o mmap_lock.o vma_init.o $(mmu-y)
 
 # Give 'page_alloc' its own module-parameter namespace
 page-alloc-y := page_alloc.o
diff --git a/mm/mmap.c b/mm/mmap.c
index 5259df031e15..81dd962a1cfc 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1554,7 +1554,7 @@ static const struct ctl_table mmap_table[] = {
 #endif /* CONFIG_SYSCTL */
 
 /*
- * initialise the percpu counter for VM
+ * initialise the percpu counter for VM, initialise VMA state.
  */
 void __init mmap_init(void)
 {
@@ -1565,6 +1565,7 @@ void __init mmap_init(void)
 #ifdef CONFIG_SYSCTL
 	register_sysctl_init("vm", mmap_table);
 #endif
+	vma_state_init();
 }
 
 /*
diff --git a/mm/nommu.c b/mm/nommu.c
index a142fc258d39..0bf4849b8204 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -399,7 +399,8 @@ static const struct ctl_table nommu_table[] = {
 };
 
 /*
- * initialise the percpu counter for VM and region record slabs
+ * initialise the percpu counter for VM and region record slabs, initialise VMA
+ * state.
  */
 void __init mmap_init(void)
 {
@@ -409,6 +410,7 @@ void __init mmap_init(void)
 	VM_BUG_ON(ret);
 	vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT);
 	register_sysctl_init("vm", nommu_table);
+	vma_state_init();
 }
 
 /*
diff --git a/mm/vma.h b/mm/vma.h
index 94307a2e4ab6..4a1e1768ca46 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -548,8 +548,15 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address);
 
 int __vm_munmap(unsigned long start, size_t len, bool unlock);
 
+
 int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma);
 
+/* vma_init.h, shared between CONFIG_MMU and nommu. */
+void __init vma_state_init(void);
+struct vm_area_struct *vm_area_alloc(struct mm_struct *mm);
+struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig);
+void vm_area_free(struct vm_area_struct *vma);
+
 /* vma_exec.h */
 #ifdef CONFIG_MMU
 int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
diff --git a/mm/vma_init.c b/mm/vma_init.c
new file mode 100644
index 000000000000..967ca8517986
--- /dev/null
+++ b/mm/vma_init.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * Functions for initialisaing, allocating, freeing and duplicating VMAs. Shared
+ * between CONFIG_MMU and non-CONFIG_MMU kernel configurations.
+ */
+
+#include "vma_internal.h"
+#include "vma.h"
+
+/* SLAB cache for vm_area_struct structures */
+static struct kmem_cache *vm_area_cachep;
+
+void __init vma_state_init(void)
+{
+	struct kmem_cache_args args = {
+		.use_freeptr_offset = true,
+		.freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
+	};
+
+	vm_area_cachep = kmem_cache_create("vm_area_struct",
+			sizeof(struct vm_area_struct), &args,
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
+			SLAB_ACCOUNT);
+}
+
+struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+
+	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	if (!vma)
+		return NULL;
+
+	vma_init(vma, mm);
+
+	return vma;
+}
+
+static void vm_area_init_from(const struct vm_area_struct *src,
+			      struct vm_area_struct *dest)
+{
+	dest->vm_mm = src->vm_mm;
+	dest->vm_ops = src->vm_ops;
+	dest->vm_start = src->vm_start;
+	dest->vm_end = src->vm_end;
+	dest->anon_vma = src->anon_vma;
+	dest->vm_pgoff = src->vm_pgoff;
+	dest->vm_file = src->vm_file;
+	dest->vm_private_data = src->vm_private_data;
+	vm_flags_init(dest, src->vm_flags);
+	memcpy(&dest->vm_page_prot, &src->vm_page_prot,
+	       sizeof(dest->vm_page_prot));
+	/*
+	 * src->shared.rb may be modified concurrently when called from
+	 * dup_mmap(), but the clone will reinitialize it.
+	 */
+	data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
+	memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
+	       sizeof(dest->vm_userfaultfd_ctx));
+#ifdef CONFIG_ANON_VMA_NAME
+	dest->anon_name = src->anon_name;
+#endif
+#ifdef CONFIG_SWAP
+	memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
+	       sizeof(dest->swap_readahead_info));
+#endif
+#ifndef CONFIG_MMU
+	dest->vm_region = src->vm_region;
+#endif
+#ifdef CONFIG_NUMA
+	dest->vm_policy = src->vm_policy;
+#endif
+}
+
+struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
+{
+	struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+
+	if (!new)
+		return NULL;
+
+	ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
+	ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
+	vm_area_init_from(orig, new);
+	vma_lock_init(new, true);
+	INIT_LIST_HEAD(&new->anon_vma_chain);
+	vma_numab_state_init(new);
+	dup_anon_vma_name(orig, new);
+
+	return new;
+}
+
+void vm_area_free(struct vm_area_struct *vma)
+{
+	/* The vma should be detached while being destroyed. */
+	vma_assert_detached(vma);
+	vma_numab_state_free(vma);
+	free_anon_vma_name(vma);
+	kmem_cache_free(vm_area_cachep, vma);
+}
diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile
index 624040fcf193..66f3831a668f 100644
--- a/tools/testing/vma/Makefile
+++ b/tools/testing/vma/Makefile
@@ -9,7 +9,7 @@ include ../shared/shared.mk
 OFILES = $(SHARED_OFILES) vma.o maple-shim.o
 TARGETS = vma
 
-vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_exec.c ../../../mm/vma.h
+vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h
 
 vma:	$(OFILES)
 	$(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS)
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
index 5832ae5d797d..2be7597a2ac2 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/vma.c
@@ -28,6 +28,7 @@ unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
  * Directly import the VMA implementation here. Our vma_internal.h wrapper
  * provides userland-equivalent functionality for everything vma.c uses.
  */
+#include "../../../mm/vma_init.c"
 #include "../../../mm/vma_exec.c"
 #include "../../../mm/vma.c"
 
@@ -91,6 +92,12 @@ static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma)
 	return res;
 }
 
+static void detach_free_vma(struct vm_area_struct *vma)
+{
+	vma_mark_detached(vma);
+	vm_area_free(vma);
+}
+
 /* Helper function to allocate a VMA and link it to the tree. */
 static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
 						 unsigned long start,
@@ -104,7 +111,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
 		return NULL;
 
 	if (attach_vma(mm, vma)) {
-		vm_area_free(vma);
+		detach_free_vma(vma);
 		return NULL;
 	}
 
@@ -249,7 +256,7 @@ static int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi)
 
 	vma_iter_set(vmi, 0);
 	for_each_vma(*vmi, vma) {
-		vm_area_free(vma);
+		detach_free_vma(vma);
 		count++;
 	}
 
@@ -319,7 +326,7 @@ static bool test_simple_merge(void)
 	ASSERT_EQ(vma->vm_pgoff, 0);
 	ASSERT_EQ(vma->vm_flags, flags);
 
-	vm_area_free(vma);
+	detach_free_vma(vma);
 	mtree_destroy(&mm.mm_mt);
 
 	return true;
@@ -361,7 +368,7 @@ static bool test_simple_modify(void)
 	ASSERT_EQ(vma->vm_end, 0x1000);
 	ASSERT_EQ(vma->vm_pgoff, 0);
 
-	vm_area_free(vma);
+	detach_free_vma(vma);
 	vma_iter_clear(&vmi);
 
 	vma = vma_next(&vmi);
@@ -370,7 +377,7 @@ static bool test_simple_modify(void)
 	ASSERT_EQ(vma->vm_end, 0x2000);
 	ASSERT_EQ(vma->vm_pgoff, 1);
 
-	vm_area_free(vma);
+	detach_free_vma(vma);
 	vma_iter_clear(&vmi);
 
 	vma = vma_next(&vmi);
@@ -379,7 +386,7 @@ static bool test_simple_modify(void)
 	ASSERT_EQ(vma->vm_end, 0x3000);
 	ASSERT_EQ(vma->vm_pgoff, 2);
 
-	vm_area_free(vma);
+	detach_free_vma(vma);
 	mtree_destroy(&mm.mm_mt);
 
 	return true;
@@ -407,7 +414,7 @@ static bool test_simple_expand(void)
 	ASSERT_EQ(vma->vm_end, 0x3000);
 	ASSERT_EQ(vma->vm_pgoff, 0);
 
-	vm_area_free(vma);
+	detach_free_vma(vma);
 	mtree_destroy(&mm.mm_mt);
 
 	return true;
@@ -428,7 +435,7 @@ static bool test_simple_shrink(void)
 	ASSERT_EQ(vma->vm_end, 0x1000);
 	ASSERT_EQ(vma->vm_pgoff, 0);
 
-	vm_area_free(vma);
+	detach_free_vma(vma);
 	mtree_destroy(&mm.mm_mt);
 
 	return true;
@@ -619,7 +626,7 @@ static bool test_merge_new(void)
 		ASSERT_EQ(vma->vm_pgoff, 0);
 		ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
 
-		vm_area_free(vma);
+		detach_free_vma(vma);
 		count++;
 	}
 
@@ -1668,6 +1675,7 @@ int main(void)
 	int num_tests = 0, num_fail = 0;
 
 	maple_tree_init();
+	vma_state_init();
 
 #define TEST(name)							\
 	do {								\
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 32e990313158..198abe66de5a 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -155,6 +155,10 @@ typedef __bitwise unsigned int vm_fault_t;
  */
 #define pr_warn_once pr_err
 
+#define data_race(expr) expr
+
+#define ASSERT_EXCLUSIVE_WRITER(x)
+
 struct kref {
 	refcount_t refcount;
 };
@@ -255,6 +259,8 @@ struct file {
 
 #define VMA_LOCK_OFFSET	0x40000000
 
+typedef struct { unsigned long v; } freeptr_t;
+
 struct vm_area_struct {
 	/* The first cache line has the info for VMA tree walking. */
 
@@ -264,9 +270,7 @@ struct vm_area_struct {
 			unsigned long vm_start;
 			unsigned long vm_end;
 		};
-#ifdef CONFIG_PER_VMA_LOCK
-		struct rcu_head vm_rcu;	/* Used for deferred freeing. */
-#endif
+		freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
 	};
 
 	struct mm_struct *vm_mm;	/* The address space we belong to. */
@@ -463,6 +467,65 @@ struct pagetable_move_control {
 		.len_in = len_,						\
 	}
 
+struct kmem_cache_args {
+	/**
+	 * @align: The required alignment for the objects.
+	 *
+	 * %0 means no specific alignment is requested.
+	 */
+	unsigned int align;
+	/**
+	 * @useroffset: Usercopy region offset.
+	 *
+	 * %0 is a valid offset, when @usersize is non-%0
+	 */
+	unsigned int useroffset;
+	/**
+	 * @usersize: Usercopy region size.
+	 *
+	 * %0 means no usercopy region is specified.
+	 */
+	unsigned int usersize;
+	/**
+	 * @freeptr_offset: Custom offset for the free pointer
+	 * in &SLAB_TYPESAFE_BY_RCU caches
+	 *
+	 * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
+	 * outside of the object. This might cause the object to grow in size.
+	 * Cache creators that have a reason to avoid this can specify a custom
+	 * free pointer offset in their struct where the free pointer will be
+	 * placed.
+	 *
+	 * Note that placing the free pointer inside the object requires the
+	 * caller to ensure that no fields are invalidated that are required to
+	 * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
+	 * details).
+	 *
+	 * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
+	 * is specified, %use_freeptr_offset must be set %true.
+	 *
+	 * Note that @ctor currently isn't supported with custom free pointers
+	 * as a @ctor requires an external free pointer.
+	 */
+	unsigned int freeptr_offset;
+	/**
+	 * @use_freeptr_offset: Whether a @freeptr_offset is used.
+	 */
+	bool use_freeptr_offset;
+	/**
+	 * @ctor: A constructor for the objects.
+	 *
+	 * The constructor is invoked for each object in a newly allocated slab
+	 * page. It is the cache user's responsibility to free object in the
+	 * same state as after calling the constructor, or deal appropriately
+	 * with any differences between a freshly constructed and a reallocated
+	 * object.
+	 *
+	 * %NULL means no constructor.
+	 */
+	void (*ctor)(void *);
+};
+
 static inline void vma_iter_invalidate(struct vma_iterator *vmi)
 {
 	mas_pause(&vmi->mas);
@@ -547,31 +610,38 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
 	vma->vm_lock_seq = UINT_MAX;
 }
 
-static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
-{
-	struct vm_area_struct *vma = calloc(1, sizeof(struct vm_area_struct));
+struct kmem_cache {
+	const char *name;
+	size_t object_size;
+	struct kmem_cache_args *args;
+};
 
-	if (!vma)
-		return NULL;
+static inline struct kmem_cache *__kmem_cache_create(const char *name,
+						     size_t object_size,
+						     struct kmem_cache_args *args)
+{
+	struct kmem_cache *ret = malloc(sizeof(struct kmem_cache));
 
-	vma_init(vma, mm);
+	ret->name = name;
+	ret->object_size = object_size;
+	ret->args = args;
 
-	return vma;
+	return ret;
 }
 
-static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
-{
-	struct vm_area_struct *new = calloc(1, sizeof(struct vm_area_struct));
+#define kmem_cache_create(__name, __object_size, __args, ...)           \
+	__kmem_cache_create((__name), (__object_size), (__args))
 
-	if (!new)
-		return NULL;
+static inline void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
+{
+	(void)gfpflags;
 
-	memcpy(new, orig, sizeof(*new));
-	refcount_set(&new->vm_refcnt, 0);
-	new->vm_lock_seq = UINT_MAX;
-	INIT_LIST_HEAD(&new->anon_vma_chain);
+	return calloc(s->object_size, 1);
+}
 
-	return new;
+static inline void kmem_cache_free(struct kmem_cache *s, void *x)
+{
+	free(x);
 }
 
 /*
@@ -738,11 +808,6 @@ static inline void mpol_put(struct mempolicy *)
 {
 }
 
-static inline void vm_area_free(struct vm_area_struct *vma)
-{
-	free(vma);
-}
-
 static inline void lru_add_drain(void)
 {
 }
@@ -1312,4 +1377,32 @@ static inline void ksm_exit(struct mm_struct *mm)
 	(void)mm;
 }
 
+static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
+{
+	(void)vma;
+	(void)reset_refcnt;
+}
+
+static inline void vma_numab_state_init(struct vm_area_struct *vma)
+{
+	(void)vma;
+}
+
+static inline void vma_numab_state_free(struct vm_area_struct *vma)
+{
+	(void)vma;
+}
+
+static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
+				     struct vm_area_struct *new_vma)
+{
+	(void)orig_vma;
+	(void)new_vma;
+}
+
+static inline void free_anon_vma_name(struct vm_area_struct *vma)
+{
+	(void)vma;
+}
+
 #endif	/* __MM_VMA_INTERNAL_H */
-- 
2.49.0
Re: [PATCH v3 4/4] mm: perform VMA allocation, freeing, duplication in mm
Posted by Pedro Falcato 7 months, 3 weeks ago
On Mon, Apr 28, 2025 at 04:28:17PM +0100, Lorenzo Stoakes wrote:
> Right now these are performed in kernel/fork.c which is odd and a violation
> of separation of concerns, as well as preventing us from integrating this
> and related logic into userland VMA testing going forward, and perhaps more
> importantly - enabling us to, in a subsequent commit, make VMA
> allocation/freeing a purely internal mm operation.
> 
> There is a fly in the ointment - nommu - mmap.c is not compiled if
> CONFIG_MMU not set, and neither is vma.c.
> 
> To square the circle, let's add a new file - vma_init.c. This will be
> compiled for both CONFIG_MMU and nommu builds, and will also form part of
> the VMA userland testing.
> 
> This allows us to de-duplicate code, while maintaining separation of
> concerns and the ability for us to userland test this logic.
> 
> Update the VMA userland tests accordingly, additionally adding a
> detach_free_vma() helper function to correctly detach VMAs before freeing
> them in test code, as this change was triggering the assert for this.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

Reviewed-by: Pedro Falcato <pfalcato@suse.de>

-- 
Pedro
Re: [PATCH v3 4/4] mm: perform VMA allocation, freeing, duplication in mm
Posted by David Hildenbrand 7 months, 3 weeks ago
On 28.04.25 17:28, Lorenzo Stoakes wrote:
> Right now these are performed in kernel/fork.c which is odd and a violation
> of separation of concerns, as well as preventing us from integrating this
> and related logic into userland VMA testing going forward, and perhaps more
> importantly - enabling us to, in a subsequent commit, make VMA
> allocation/freeing a purely internal mm operation.
> 
> There is a fly in the ointment - nommu - mmap.c is not compiled if
> CONFIG_MMU not set, and neither is vma.c.
> 
> To square the circle, let's add a new file - vma_init.c. This will be
> compiled for both CONFIG_MMU and nommu builds, and will also form part of
> the VMA userland testing.
> 
> This allows us to de-duplicate code, while maintaining separation of
> concerns and the ability for us to userland test this logic.
> 
> Update the VMA userland tests accordingly, additionally adding a
> detach_free_vma() helper function to correctly detach VMAs before freeing
> them in test code, as this change was triggering the assert for this.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> ---

Reviewed-by: David Hildenbrand <david@redhat.com>

-- 
Cheers,

David / dhildenb
Re: [PATCH v3 4/4] mm: perform VMA allocation, freeing, duplication in mm
Posted by Kees Cook 7 months, 3 weeks ago
On Mon, Apr 28, 2025 at 04:28:17PM +0100, Lorenzo Stoakes wrote:
> Right now these are performed in kernel/fork.c which is odd and a violation
> of separation of concerns, as well as preventing us from integrating this
> and related logic into userland VMA testing going forward, and perhaps more
> importantly - enabling us to, in a subsequent commit, make VMA
> allocation/freeing a purely internal mm operation.
> 
> There is a fly in the ointment - nommu - mmap.c is not compiled if
> CONFIG_MMU not set, and neither is vma.c.
> 
> To square the circle, let's add a new file - vma_init.c. This will be
> compiled for both CONFIG_MMU and nommu builds, and will also form part of
> the VMA userland testing.
> 
> This allows us to de-duplicate code, while maintaining separation of
> concerns and the ability for us to userland test this logic.
> 
> Update the VMA userland tests accordingly, additionally adding a
> detach_free_vma() helper function to correctly detach VMAs before freeing
> them in test code, as this change was triggering the assert for this.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

Reviewed-by: Kees Cook <kees@kernel.org>

-- 
Kees Cook
Re: [PATCH v3 4/4] mm: perform VMA allocation, freeing, duplication in mm
Posted by Suren Baghdasaryan 7 months, 3 weeks ago
On Mon, Apr 28, 2025 at 8:31 AM Lorenzo Stoakes
<lorenzo.stoakes@oracle.com> wrote:
>
> Right now these are performed in kernel/fork.c which is odd and a violation
> of separation of concerns, as well as preventing us from integrating this
> and related logic into userland VMA testing going forward, and perhaps more
> importantly - enabling us to, in a subsequent commit, make VMA
> allocation/freeing a purely internal mm operation.
>
> There is a fly in the ointment - nommu - mmap.c is not compiled if
> CONFIG_MMU not set, and neither is vma.c.
>
> To square the circle, let's add a new file - vma_init.c. This will be
> compiled for both CONFIG_MMU and nommu builds, and will also form part of
> the VMA userland testing.
>
> This allows us to de-duplicate code, while maintaining separation of
> concerns and the ability for us to userland test this logic.
>
> Update the VMA userland tests accordingly, additionally adding a
> detach_free_vma() helper function to correctly detach VMAs before freeing
> them in test code, as this change was triggering the assert for this.

Great! We are getting closer to parity between tests and the kernel code.

>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

Reviewed-by: Suren Baghdasaryan <surenb@google.com>

> ---
>  MAINTAINERS                      |   1 +
>  kernel/fork.c                    |  88 -------------------
>  mm/Makefile                      |   2 +-
>  mm/mmap.c                        |   3 +-
>  mm/nommu.c                       |   4 +-
>  mm/vma.h                         |   7 ++
>  mm/vma_init.c                    | 101 ++++++++++++++++++++++
>  tools/testing/vma/Makefile       |   2 +-
>  tools/testing/vma/vma.c          |  26 ++++--
>  tools/testing/vma/vma_internal.h | 143 +++++++++++++++++++++++++------
>  10 files changed, 251 insertions(+), 126 deletions(-)
>  create mode 100644 mm/vma_init.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 1ee1c22e6e36..d274e6802ba5 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -15656,6 +15656,7 @@ F:      mm/mseal.c
>  F:     mm/vma.c
>  F:     mm/vma.h
>  F:     mm/vma_exec.c
> +F:     mm/vma_init.c
>  F:     mm/vma_internal.h
>  F:     tools/testing/selftests/mm/merge.c
>  F:     tools/testing/vma/
> diff --git a/kernel/fork.c b/kernel/fork.c
> index ac9f9267a473..9e4616dacd82 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -431,88 +431,9 @@ struct kmem_cache *files_cachep;
>  /* SLAB cache for fs_struct structures (tsk->fs) */
>  struct kmem_cache *fs_cachep;
>
> -/* SLAB cache for vm_area_struct structures */
> -static struct kmem_cache *vm_area_cachep;
> -
>  /* SLAB cache for mm_struct structures (tsk->mm) */
>  static struct kmem_cache *mm_cachep;

Maybe at some point we will be able to move mm_cachep out of here as well?

>
> -struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
> -{
> -       struct vm_area_struct *vma;
> -
> -       vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> -       if (!vma)
> -               return NULL;
> -
> -       vma_init(vma, mm);
> -
> -       return vma;
> -}
> -
> -static void vm_area_init_from(const struct vm_area_struct *src,
> -                             struct vm_area_struct *dest)
> -{
> -       dest->vm_mm = src->vm_mm;
> -       dest->vm_ops = src->vm_ops;
> -       dest->vm_start = src->vm_start;
> -       dest->vm_end = src->vm_end;
> -       dest->anon_vma = src->anon_vma;
> -       dest->vm_pgoff = src->vm_pgoff;
> -       dest->vm_file = src->vm_file;
> -       dest->vm_private_data = src->vm_private_data;
> -       vm_flags_init(dest, src->vm_flags);
> -       memcpy(&dest->vm_page_prot, &src->vm_page_prot,
> -              sizeof(dest->vm_page_prot));
> -       /*
> -        * src->shared.rb may be modified concurrently when called from
> -        * dup_mmap(), but the clone will reinitialize it.
> -        */
> -       data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
> -       memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
> -              sizeof(dest->vm_userfaultfd_ctx));
> -#ifdef CONFIG_ANON_VMA_NAME
> -       dest->anon_name = src->anon_name;
> -#endif
> -#ifdef CONFIG_SWAP
> -       memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
> -              sizeof(dest->swap_readahead_info));
> -#endif
> -#ifndef CONFIG_MMU
> -       dest->vm_region = src->vm_region;
> -#endif
> -#ifdef CONFIG_NUMA
> -       dest->vm_policy = src->vm_policy;
> -#endif
> -}
> -
> -struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
> -{
> -       struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> -
> -       if (!new)
> -               return NULL;
> -
> -       ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
> -       ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
> -       vm_area_init_from(orig, new);
> -       vma_lock_init(new, true);
> -       INIT_LIST_HEAD(&new->anon_vma_chain);
> -       vma_numab_state_init(new);
> -       dup_anon_vma_name(orig, new);
> -
> -       return new;
> -}
> -
> -void vm_area_free(struct vm_area_struct *vma)
> -{
> -       /* The vma should be detached while being destroyed. */
> -       vma_assert_detached(vma);
> -       vma_numab_state_free(vma);
> -       free_anon_vma_name(vma);
> -       kmem_cache_free(vm_area_cachep, vma);
> -}
> -
>  static void account_kernel_stack(struct task_struct *tsk, int account)
>  {
>         if (IS_ENABLED(CONFIG_VMAP_STACK)) {
> @@ -3033,11 +2954,6 @@ void __init mm_cache_init(void)
>
>  void __init proc_caches_init(void)
>  {
> -       struct kmem_cache_args args = {
> -               .use_freeptr_offset = true,
> -               .freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
> -       };
> -
>         sighand_cachep = kmem_cache_create("sighand_cache",
>                         sizeof(struct sighand_struct), 0,
>                         SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
> @@ -3054,10 +2970,6 @@ void __init proc_caches_init(void)
>                         sizeof(struct fs_struct), 0,
>                         SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
>                         NULL);
> -       vm_area_cachep = kmem_cache_create("vm_area_struct",
> -                       sizeof(struct vm_area_struct), &args,
> -                       SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
> -                       SLAB_ACCOUNT);
>         mmap_init();
>         nsproxy_cache_init();
>  }
> diff --git a/mm/Makefile b/mm/Makefile
> index 15a901bb431a..690ddcf7d9a1 100644
> --- a/mm/Makefile
> +++ b/mm/Makefile
> @@ -55,7 +55,7 @@ obj-y                 := filemap.o mempool.o oom_kill.o fadvise.o \
>                            mm_init.o percpu.o slab_common.o \
>                            compaction.o show_mem.o \
>                            interval_tree.o list_lru.o workingset.o \
> -                          debug.o gup.o mmap_lock.o $(mmu-y)
> +                          debug.o gup.o mmap_lock.o vma_init.o $(mmu-y)
>
>  # Give 'page_alloc' its own module-parameter namespace
>  page-alloc-y := page_alloc.o
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 5259df031e15..81dd962a1cfc 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1554,7 +1554,7 @@ static const struct ctl_table mmap_table[] = {
>  #endif /* CONFIG_SYSCTL */
>
>  /*
> - * initialise the percpu counter for VM
> + * initialise the percpu counter for VM, initialise VMA state.
>   */
>  void __init mmap_init(void)
>  {
> @@ -1565,6 +1565,7 @@ void __init mmap_init(void)
>  #ifdef CONFIG_SYSCTL
>         register_sysctl_init("vm", mmap_table);
>  #endif
> +       vma_state_init();
>  }
>
>  /*
> diff --git a/mm/nommu.c b/mm/nommu.c
> index a142fc258d39..0bf4849b8204 100644
> --- a/mm/nommu.c
> +++ b/mm/nommu.c
> @@ -399,7 +399,8 @@ static const struct ctl_table nommu_table[] = {
>  };
>
>  /*
> - * initialise the percpu counter for VM and region record slabs
> + * initialise the percpu counter for VM and region record slabs, initialise VMA
> + * state.
>   */
>  void __init mmap_init(void)
>  {
> @@ -409,6 +410,7 @@ void __init mmap_init(void)
>         VM_BUG_ON(ret);
>         vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT);
>         register_sysctl_init("vm", nommu_table);
> +       vma_state_init();
>  }
>
>  /*
> diff --git a/mm/vma.h b/mm/vma.h
> index 94307a2e4ab6..4a1e1768ca46 100644
> --- a/mm/vma.h
> +++ b/mm/vma.h
> @@ -548,8 +548,15 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address);
>
>  int __vm_munmap(unsigned long start, size_t len, bool unlock);
>
> +
>  int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma);
>
> +/* vma_init.h, shared between CONFIG_MMU and nommu. */
> +void __init vma_state_init(void);
> +struct vm_area_struct *vm_area_alloc(struct mm_struct *mm);
> +struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig);
> +void vm_area_free(struct vm_area_struct *vma);
> +
>  /* vma_exec.h */
>  #ifdef CONFIG_MMU
>  int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
> diff --git a/mm/vma_init.c b/mm/vma_init.c
> new file mode 100644
> index 000000000000..967ca8517986
> --- /dev/null
> +++ b/mm/vma_init.c
> @@ -0,0 +1,101 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +/*
> + * Functions for initialisaing, allocating, freeing and duplicating VMAs. Shared
> + * between CONFIG_MMU and non-CONFIG_MMU kernel configurations.
> + */
> +
> +#include "vma_internal.h"
> +#include "vma.h"
> +
> +/* SLAB cache for vm_area_struct structures */
> +static struct kmem_cache *vm_area_cachep;
> +
> +void __init vma_state_init(void)
> +{
> +       struct kmem_cache_args args = {
> +               .use_freeptr_offset = true,
> +               .freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
> +       };
> +
> +       vm_area_cachep = kmem_cache_create("vm_area_struct",
> +                       sizeof(struct vm_area_struct), &args,
> +                       SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
> +                       SLAB_ACCOUNT);
> +}
> +
> +struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
> +{
> +       struct vm_area_struct *vma;
> +
> +       vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> +       if (!vma)
> +               return NULL;
> +
> +       vma_init(vma, mm);
> +
> +       return vma;
> +}
> +
> +static void vm_area_init_from(const struct vm_area_struct *src,
> +                             struct vm_area_struct *dest)
> +{
> +       dest->vm_mm = src->vm_mm;
> +       dest->vm_ops = src->vm_ops;
> +       dest->vm_start = src->vm_start;
> +       dest->vm_end = src->vm_end;
> +       dest->anon_vma = src->anon_vma;
> +       dest->vm_pgoff = src->vm_pgoff;
> +       dest->vm_file = src->vm_file;
> +       dest->vm_private_data = src->vm_private_data;
> +       vm_flags_init(dest, src->vm_flags);
> +       memcpy(&dest->vm_page_prot, &src->vm_page_prot,
> +              sizeof(dest->vm_page_prot));
> +       /*
> +        * src->shared.rb may be modified concurrently when called from
> +        * dup_mmap(), but the clone will reinitialize it.
> +        */
> +       data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
> +       memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
> +              sizeof(dest->vm_userfaultfd_ctx));
> +#ifdef CONFIG_ANON_VMA_NAME
> +       dest->anon_name = src->anon_name;
> +#endif
> +#ifdef CONFIG_SWAP
> +       memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
> +              sizeof(dest->swap_readahead_info));
> +#endif
> +#ifndef CONFIG_MMU
> +       dest->vm_region = src->vm_region;
> +#endif
> +#ifdef CONFIG_NUMA
> +       dest->vm_policy = src->vm_policy;
> +#endif
> +}
> +
> +struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
> +{
> +       struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> +
> +       if (!new)
> +               return NULL;
> +
> +       ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
> +       ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
> +       vm_area_init_from(orig, new);
> +       vma_lock_init(new, true);
> +       INIT_LIST_HEAD(&new->anon_vma_chain);
> +       vma_numab_state_init(new);
> +       dup_anon_vma_name(orig, new);
> +
> +       return new;
> +}
> +
> +void vm_area_free(struct vm_area_struct *vma)
> +{
> +       /* The vma should be detached while being destroyed. */
> +       vma_assert_detached(vma);
> +       vma_numab_state_free(vma);
> +       free_anon_vma_name(vma);
> +       kmem_cache_free(vm_area_cachep, vma);
> +}
> diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile
> index 624040fcf193..66f3831a668f 100644
> --- a/tools/testing/vma/Makefile
> +++ b/tools/testing/vma/Makefile
> @@ -9,7 +9,7 @@ include ../shared/shared.mk
>  OFILES = $(SHARED_OFILES) vma.o maple-shim.o
>  TARGETS = vma
>
> -vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_exec.c ../../../mm/vma.h
> +vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h
>
>  vma:   $(OFILES)
>         $(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS)
> diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
> index 5832ae5d797d..2be7597a2ac2 100644
> --- a/tools/testing/vma/vma.c
> +++ b/tools/testing/vma/vma.c
> @@ -28,6 +28,7 @@ unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
>   * Directly import the VMA implementation here. Our vma_internal.h wrapper
>   * provides userland-equivalent functionality for everything vma.c uses.
>   */
> +#include "../../../mm/vma_init.c"
>  #include "../../../mm/vma_exec.c"
>  #include "../../../mm/vma.c"
>
> @@ -91,6 +92,12 @@ static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma)
>         return res;
>  }
>
> +static void detach_free_vma(struct vm_area_struct *vma)

In case you respin another version, I think this change related to
detach_free_vma() would better be done in a separate patch. But I'm
totally fine with the way it is now, so don't respin just for that.

> +{
> +       vma_mark_detached(vma);
> +       vm_area_free(vma);
> +}
> +
>  /* Helper function to allocate a VMA and link it to the tree. */
>  static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
>                                                  unsigned long start,
> @@ -104,7 +111,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
>                 return NULL;
>
>         if (attach_vma(mm, vma)) {
> -               vm_area_free(vma);
> +               detach_free_vma(vma);
>                 return NULL;
>         }
>
> @@ -249,7 +256,7 @@ static int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi)
>
>         vma_iter_set(vmi, 0);
>         for_each_vma(*vmi, vma) {
> -               vm_area_free(vma);
> +               detach_free_vma(vma);
>                 count++;
>         }
>
> @@ -319,7 +326,7 @@ static bool test_simple_merge(void)
>         ASSERT_EQ(vma->vm_pgoff, 0);
>         ASSERT_EQ(vma->vm_flags, flags);
>
> -       vm_area_free(vma);
> +       detach_free_vma(vma);
>         mtree_destroy(&mm.mm_mt);
>
>         return true;
> @@ -361,7 +368,7 @@ static bool test_simple_modify(void)
>         ASSERT_EQ(vma->vm_end, 0x1000);
>         ASSERT_EQ(vma->vm_pgoff, 0);
>
> -       vm_area_free(vma);
> +       detach_free_vma(vma);
>         vma_iter_clear(&vmi);
>
>         vma = vma_next(&vmi);
> @@ -370,7 +377,7 @@ static bool test_simple_modify(void)
>         ASSERT_EQ(vma->vm_end, 0x2000);
>         ASSERT_EQ(vma->vm_pgoff, 1);
>
> -       vm_area_free(vma);
> +       detach_free_vma(vma);
>         vma_iter_clear(&vmi);
>
>         vma = vma_next(&vmi);
> @@ -379,7 +386,7 @@ static bool test_simple_modify(void)
>         ASSERT_EQ(vma->vm_end, 0x3000);
>         ASSERT_EQ(vma->vm_pgoff, 2);
>
> -       vm_area_free(vma);
> +       detach_free_vma(vma);
>         mtree_destroy(&mm.mm_mt);
>
>         return true;
> @@ -407,7 +414,7 @@ static bool test_simple_expand(void)
>         ASSERT_EQ(vma->vm_end, 0x3000);
>         ASSERT_EQ(vma->vm_pgoff, 0);
>
> -       vm_area_free(vma);
> +       detach_free_vma(vma);
>         mtree_destroy(&mm.mm_mt);
>
>         return true;
> @@ -428,7 +435,7 @@ static bool test_simple_shrink(void)
>         ASSERT_EQ(vma->vm_end, 0x1000);
>         ASSERT_EQ(vma->vm_pgoff, 0);
>
> -       vm_area_free(vma);
> +       detach_free_vma(vma);
>         mtree_destroy(&mm.mm_mt);
>
>         return true;
> @@ -619,7 +626,7 @@ static bool test_merge_new(void)
>                 ASSERT_EQ(vma->vm_pgoff, 0);
>                 ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
>
> -               vm_area_free(vma);
> +               detach_free_vma(vma);
>                 count++;
>         }
>
> @@ -1668,6 +1675,7 @@ int main(void)
>         int num_tests = 0, num_fail = 0;
>
>         maple_tree_init();
> +       vma_state_init();
>
>  #define TEST(name)                                                     \
>         do {                                                            \
> diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
> index 32e990313158..198abe66de5a 100644
> --- a/tools/testing/vma/vma_internal.h
> +++ b/tools/testing/vma/vma_internal.h
> @@ -155,6 +155,10 @@ typedef __bitwise unsigned int vm_fault_t;
>   */
>  #define pr_warn_once pr_err
>
> +#define data_race(expr) expr
> +
> +#define ASSERT_EXCLUSIVE_WRITER(x)
> +
>  struct kref {
>         refcount_t refcount;
>  };
> @@ -255,6 +259,8 @@ struct file {
>
>  #define VMA_LOCK_OFFSET        0x40000000
>
> +typedef struct { unsigned long v; } freeptr_t;
> +
>  struct vm_area_struct {
>         /* The first cache line has the info for VMA tree walking. */
>
> @@ -264,9 +270,7 @@ struct vm_area_struct {
>                         unsigned long vm_start;
>                         unsigned long vm_end;
>                 };
> -#ifdef CONFIG_PER_VMA_LOCK
> -               struct rcu_head vm_rcu; /* Used for deferred freeing. */
> -#endif
> +               freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */

Oops, I must have missed this when adding SLAB_TYPESAFE_BY_RCU to the
vm_area_struct cache. Thanks for fixing it!

>         };
>
>         struct mm_struct *vm_mm;        /* The address space we belong to. */
> @@ -463,6 +467,65 @@ struct pagetable_move_control {
>                 .len_in = len_,                                         \
>         }
>
> +struct kmem_cache_args {
> +       /**
> +        * @align: The required alignment for the objects.
> +        *
> +        * %0 means no specific alignment is requested.
> +        */
> +       unsigned int align;
> +       /**
> +        * @useroffset: Usercopy region offset.
> +        *
> +        * %0 is a valid offset, when @usersize is non-%0
> +        */
> +       unsigned int useroffset;
> +       /**
> +        * @usersize: Usercopy region size.
> +        *
> +        * %0 means no usercopy region is specified.
> +        */
> +       unsigned int usersize;
> +       /**
> +        * @freeptr_offset: Custom offset for the free pointer
> +        * in &SLAB_TYPESAFE_BY_RCU caches
> +        *
> +        * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
> +        * outside of the object. This might cause the object to grow in size.
> +        * Cache creators that have a reason to avoid this can specify a custom
> +        * free pointer offset in their struct where the free pointer will be
> +        * placed.
> +        *
> +        * Note that placing the free pointer inside the object requires the
> +        * caller to ensure that no fields are invalidated that are required to
> +        * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
> +        * details).
> +        *
> +        * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
> +        * is specified, %use_freeptr_offset must be set %true.
> +        *
> +        * Note that @ctor currently isn't supported with custom free pointers
> +        * as a @ctor requires an external free pointer.
> +        */
> +       unsigned int freeptr_offset;
> +       /**
> +        * @use_freeptr_offset: Whether a @freeptr_offset is used.
> +        */
> +       bool use_freeptr_offset;
> +       /**
> +        * @ctor: A constructor for the objects.
> +        *
> +        * The constructor is invoked for each object in a newly allocated slab
> +        * page. It is the cache user's responsibility to free object in the
> +        * same state as after calling the constructor, or deal appropriately
> +        * with any differences between a freshly constructed and a reallocated
> +        * object.
> +        *
> +        * %NULL means no constructor.
> +        */
> +       void (*ctor)(void *);
> +};
> +
>  static inline void vma_iter_invalidate(struct vma_iterator *vmi)
>  {
>         mas_pause(&vmi->mas);
> @@ -547,31 +610,38 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
>         vma->vm_lock_seq = UINT_MAX;
>  }
>
> -static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
> -{
> -       struct vm_area_struct *vma = calloc(1, sizeof(struct vm_area_struct));
> +struct kmem_cache {
> +       const char *name;
> +       size_t object_size;
> +       struct kmem_cache_args *args;
> +};
>
> -       if (!vma)
> -               return NULL;
> +static inline struct kmem_cache *__kmem_cache_create(const char *name,
> +                                                    size_t object_size,
> +                                                    struct kmem_cache_args *args)
> +{
> +       struct kmem_cache *ret = malloc(sizeof(struct kmem_cache));
>
> -       vma_init(vma, mm);
> +       ret->name = name;
> +       ret->object_size = object_size;
> +       ret->args = args;
>
> -       return vma;
> +       return ret;
>  }
>
> -static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
> -{
> -       struct vm_area_struct *new = calloc(1, sizeof(struct vm_area_struct));
> +#define kmem_cache_create(__name, __object_size, __args, ...)           \
> +       __kmem_cache_create((__name), (__object_size), (__args))
>
> -       if (!new)
> -               return NULL;
> +static inline void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
> +{
> +       (void)gfpflags;
>
> -       memcpy(new, orig, sizeof(*new));
> -       refcount_set(&new->vm_refcnt, 0);
> -       new->vm_lock_seq = UINT_MAX;
> -       INIT_LIST_HEAD(&new->anon_vma_chain);
> +       return calloc(s->object_size, 1);
> +}
>
> -       return new;
> +static inline void kmem_cache_free(struct kmem_cache *s, void *x)
> +{
> +       free(x);
>  }
>
>  /*
> @@ -738,11 +808,6 @@ static inline void mpol_put(struct mempolicy *)
>  {
>  }
>
> -static inline void vm_area_free(struct vm_area_struct *vma)
> -{
> -       free(vma);
> -}
> -
>  static inline void lru_add_drain(void)
>  {
>  }
> @@ -1312,4 +1377,32 @@ static inline void ksm_exit(struct mm_struct *mm)
>         (void)mm;
>  }
>
> +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
> +{
> +       (void)vma;
> +       (void)reset_refcnt;
> +}
> +
> +static inline void vma_numab_state_init(struct vm_area_struct *vma)
> +{
> +       (void)vma;
> +}
> +
> +static inline void vma_numab_state_free(struct vm_area_struct *vma)
> +{
> +       (void)vma;
> +}
> +
> +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
> +                                    struct vm_area_struct *new_vma)
> +{
> +       (void)orig_vma;
> +       (void)new_vma;
> +}
> +
> +static inline void free_anon_vma_name(struct vm_area_struct *vma)
> +{
> +       (void)vma;
> +}
> +
>  #endif /* __MM_VMA_INTERNAL_H */
> --
> 2.49.0
>
Re: [PATCH v3 4/4] mm: perform VMA allocation, freeing, duplication in mm
Posted by Vlastimil Babka 7 months, 3 weeks ago
On 4/28/25 17:28, Lorenzo Stoakes wrote:
> Right now these are performed in kernel/fork.c which is odd and a violation
> of separation of concerns, as well as preventing us from integrating this
> and related logic into userland VMA testing going forward, and perhaps more
> importantly - enabling us to, in a subsequent commit, make VMA
> allocation/freeing a purely internal mm operation.

I wonder if the last part is from an earlier version and now obsolete
because there's not subsequent commit in this series and the placement of
alloc/freeing in vma_init.c seems making those purely internal mm operations
already? Or do you mean some further plans?

> There is a fly in the ointment - nommu - mmap.c is not compiled if
> CONFIG_MMU not set, and neither is vma.c.
> 
> To square the circle, let's add a new file - vma_init.c. This will be
> compiled for both CONFIG_MMU and nommu builds, and will also form part of
> the VMA userland testing.
> 
> This allows us to de-duplicate code, while maintaining separation of
> concerns and the ability for us to userland test this logic.
> 
> Update the VMA userland tests accordingly, additionally adding a
> detach_free_vma() helper function to correctly detach VMAs before freeing
> them in test code, as this change was triggering the assert for this.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Re: [PATCH v3 4/4] mm: perform VMA allocation, freeing, duplication in mm
Posted by Lorenzo Stoakes 7 months, 3 weeks ago
On Tue, Apr 29, 2025 at 09:22:59AM +0200, Vlastimil Babka wrote:
> On 4/28/25 17:28, Lorenzo Stoakes wrote:
> > Right now these are performed in kernel/fork.c which is odd and a violation
> > of separation of concerns, as well as preventing us from integrating this
> > and related logic into userland VMA testing going forward, and perhaps more
> > importantly - enabling us to, in a subsequent commit, make VMA
> > allocation/freeing a purely internal mm operation.
>
> I wonder if the last part is from an earlier version and now obsolete
> because there's not subsequent commit in this series and the placement of
> alloc/freeing in vma_init.c seems making those purely internal mm operations
> already? Or do you mean some further plans?
>

Sorry, missed this!

Andrew - could we delete the last part of this sentence so it reads:

Right now these are performed in kernel/fork.c which is odd and a violation
of separation of concerns, as well as preventing us from integrating this
and related logic into userland VMA testing going forward.

Thanks!
Re: [PATCH v3 4/4] mm: perform VMA allocation, freeing, duplication in mm
Posted by Andrew Morton 7 months, 3 weeks ago
On Wed, 30 Apr 2025 10:20:10 +0100 Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote:

> On Tue, Apr 29, 2025 at 09:22:59AM +0200, Vlastimil Babka wrote:
> > On 4/28/25 17:28, Lorenzo Stoakes wrote:
> > > Right now these are performed in kernel/fork.c which is odd and a violation
> > > of separation of concerns, as well as preventing us from integrating this
> > > and related logic into userland VMA testing going forward, and perhaps more
> > > importantly - enabling us to, in a subsequent commit, make VMA
> > > allocation/freeing a purely internal mm operation.
> >
> > I wonder if the last part is from an earlier version and now obsolete
> > because there's not subsequent commit in this series and the placement of
> > alloc/freeing in vma_init.c seems making those purely internal mm operations
> > already? Or do you mean some further plans?
> >
> 
> Sorry, missed this!
> 
> Andrew - could we delete the last part of this sentence so it reads:
> 
> Right now these are performed in kernel/fork.c which is odd and a violation
> of separation of concerns, as well as preventing us from integrating this
> and related logic into userland VMA testing going forward.

Sure.  The result:

: Right now these are performed in kernel/fork.c which is odd and a
: violation of separation of concerns, as well as preventing us from
: integrating this and related logic into userland VMA testing going
: forward.
: 
: There is a fly in the ointment - nommu - mmap.c is not compiled if
: CONFIG_MMU not set, and neither is vma.c.
: 
: To square the circle, let's add a new file - vma_init.c.  This will be
: compiled for both CONFIG_MMU and nommu builds, and will also form part of
: the VMA userland testing.
: 
: This allows us to de-duplicate code, while maintaining separation of
: concerns and the ability for us to userland test this logic.
: 
: Update the VMA userland tests accordingly, additionally adding a
: detach_free_vma() helper function to correctly detach VMAs before freeing
: them in test code, as this change was triggering the assert for this.
Re: [PATCH v3 4/4] mm: perform VMA allocation, freeing, duplication in mm
Posted by Lorenzo Stoakes 7 months, 3 weeks ago
On Wed, Apr 30, 2025 at 02:42:36PM -0700, Andrew Morton wrote:
> On Wed, 30 Apr 2025 10:20:10 +0100 Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote:
>
> > On Tue, Apr 29, 2025 at 09:22:59AM +0200, Vlastimil Babka wrote:
> > > On 4/28/25 17:28, Lorenzo Stoakes wrote:
> > > > Right now these are performed in kernel/fork.c which is odd and a violation
> > > > of separation of concerns, as well as preventing us from integrating this
> > > > and related logic into userland VMA testing going forward, and perhaps more
> > > > importantly - enabling us to, in a subsequent commit, make VMA
> > > > allocation/freeing a purely internal mm operation.
> > >
> > > I wonder if the last part is from an earlier version and now obsolete
> > > because there's not subsequent commit in this series and the placement of
> > > alloc/freeing in vma_init.c seems making those purely internal mm operations
> > > already? Or do you mean some further plans?
> > >
> >
> > Sorry, missed this!
> >
> > Andrew - could we delete the last part of this sentence so it reads:
> >
> > Right now these are performed in kernel/fork.c which is odd and a violation
> > of separation of concerns, as well as preventing us from integrating this
> > and related logic into userland VMA testing going forward.
>
> Sure.  The result:
>
> : Right now these are performed in kernel/fork.c which is odd and a
> : violation of separation of concerns, as well as preventing us from
> : integrating this and related logic into userland VMA testing going
> : forward.
> :
> : There is a fly in the ointment - nommu - mmap.c is not compiled if
> : CONFIG_MMU not set, and neither is vma.c.
> :
> : To square the circle, let's add a new file - vma_init.c.  This will be
> : compiled for both CONFIG_MMU and nommu builds, and will also form part of
> : the VMA userland testing.
> :
> : This allows us to de-duplicate code, while maintaining separation of
> : concerns and the ability for us to userland test this logic.
> :
> : Update the VMA userland tests accordingly, additionally adding a
> : detach_free_vma() helper function to correctly detach VMAs before freeing
> : them in test code, as this change was triggering the assert for this.
>

Perfect, thanks!
Re: [PATCH v3 4/4] mm: perform VMA allocation, freeing, duplication in mm
Posted by Lorenzo Stoakes 7 months, 3 weeks ago
Andrew - I maanged to typo vma_init.c to vma_init.h here in mm/vma.c (at least I
was consistent in these silly typos...).

Would you prefer a fixpatch or is it ok for you to quickly fix that up?

Thanks!

On Mon, Apr 28, 2025 at 04:28:17PM +0100, Lorenzo Stoakes wrote:
> Right now these are performed in kernel/fork.c which is odd and a violation
> of separation of concerns, as well as preventing us from integrating this
> and related logic into userland VMA testing going forward, and perhaps more
> importantly - enabling us to, in a subsequent commit, make VMA
> allocation/freeing a purely internal mm operation.
>
> There is a fly in the ointment - nommu - mmap.c is not compiled if
> CONFIG_MMU not set, and neither is vma.c.
>
> To square the circle, let's add a new file - vma_init.c. This will be
> compiled for both CONFIG_MMU and nommu builds, and will also form part of
> the VMA userland testing.
>
> This allows us to de-duplicate code, while maintaining separation of
> concerns and the ability for us to userland test this logic.
>
> Update the VMA userland tests accordingly, additionally adding a
> detach_free_vma() helper function to correctly detach VMAs before freeing
> them in test code, as this change was triggering the assert for this.
>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> ---
>  MAINTAINERS                      |   1 +
>  kernel/fork.c                    |  88 -------------------
>  mm/Makefile                      |   2 +-
>  mm/mmap.c                        |   3 +-
>  mm/nommu.c                       |   4 +-
>  mm/vma.h                         |   7 ++
>  mm/vma_init.c                    | 101 ++++++++++++++++++++++
>  tools/testing/vma/Makefile       |   2 +-
>  tools/testing/vma/vma.c          |  26 ++++--
>  tools/testing/vma/vma_internal.h | 143 +++++++++++++++++++++++++------
>  10 files changed, 251 insertions(+), 126 deletions(-)
>  create mode 100644 mm/vma_init.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 1ee1c22e6e36..d274e6802ba5 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -15656,6 +15656,7 @@ F:	mm/mseal.c
>  F:	mm/vma.c
>  F:	mm/vma.h
>  F:	mm/vma_exec.c
> +F:	mm/vma_init.c
>  F:	mm/vma_internal.h
>  F:	tools/testing/selftests/mm/merge.c
>  F:	tools/testing/vma/
> diff --git a/kernel/fork.c b/kernel/fork.c
> index ac9f9267a473..9e4616dacd82 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -431,88 +431,9 @@ struct kmem_cache *files_cachep;
>  /* SLAB cache for fs_struct structures (tsk->fs) */
>  struct kmem_cache *fs_cachep;
>
> -/* SLAB cache for vm_area_struct structures */
> -static struct kmem_cache *vm_area_cachep;
> -
>  /* SLAB cache for mm_struct structures (tsk->mm) */
>  static struct kmem_cache *mm_cachep;
>
> -struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
> -{
> -	struct vm_area_struct *vma;
> -
> -	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> -	if (!vma)
> -		return NULL;
> -
> -	vma_init(vma, mm);
> -
> -	return vma;
> -}
> -
> -static void vm_area_init_from(const struct vm_area_struct *src,
> -			      struct vm_area_struct *dest)
> -{
> -	dest->vm_mm = src->vm_mm;
> -	dest->vm_ops = src->vm_ops;
> -	dest->vm_start = src->vm_start;
> -	dest->vm_end = src->vm_end;
> -	dest->anon_vma = src->anon_vma;
> -	dest->vm_pgoff = src->vm_pgoff;
> -	dest->vm_file = src->vm_file;
> -	dest->vm_private_data = src->vm_private_data;
> -	vm_flags_init(dest, src->vm_flags);
> -	memcpy(&dest->vm_page_prot, &src->vm_page_prot,
> -	       sizeof(dest->vm_page_prot));
> -	/*
> -	 * src->shared.rb may be modified concurrently when called from
> -	 * dup_mmap(), but the clone will reinitialize it.
> -	 */
> -	data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
> -	memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
> -	       sizeof(dest->vm_userfaultfd_ctx));
> -#ifdef CONFIG_ANON_VMA_NAME
> -	dest->anon_name = src->anon_name;
> -#endif
> -#ifdef CONFIG_SWAP
> -	memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
> -	       sizeof(dest->swap_readahead_info));
> -#endif
> -#ifndef CONFIG_MMU
> -	dest->vm_region = src->vm_region;
> -#endif
> -#ifdef CONFIG_NUMA
> -	dest->vm_policy = src->vm_policy;
> -#endif
> -}
> -
> -struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
> -{
> -	struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> -
> -	if (!new)
> -		return NULL;
> -
> -	ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
> -	ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
> -	vm_area_init_from(orig, new);
> -	vma_lock_init(new, true);
> -	INIT_LIST_HEAD(&new->anon_vma_chain);
> -	vma_numab_state_init(new);
> -	dup_anon_vma_name(orig, new);
> -
> -	return new;
> -}
> -
> -void vm_area_free(struct vm_area_struct *vma)
> -{
> -	/* The vma should be detached while being destroyed. */
> -	vma_assert_detached(vma);
> -	vma_numab_state_free(vma);
> -	free_anon_vma_name(vma);
> -	kmem_cache_free(vm_area_cachep, vma);
> -}
> -
>  static void account_kernel_stack(struct task_struct *tsk, int account)
>  {
>  	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
> @@ -3033,11 +2954,6 @@ void __init mm_cache_init(void)
>
>  void __init proc_caches_init(void)
>  {
> -	struct kmem_cache_args args = {
> -		.use_freeptr_offset = true,
> -		.freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
> -	};
> -
>  	sighand_cachep = kmem_cache_create("sighand_cache",
>  			sizeof(struct sighand_struct), 0,
>  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
> @@ -3054,10 +2970,6 @@ void __init proc_caches_init(void)
>  			sizeof(struct fs_struct), 0,
>  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
>  			NULL);
> -	vm_area_cachep = kmem_cache_create("vm_area_struct",
> -			sizeof(struct vm_area_struct), &args,
> -			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
> -			SLAB_ACCOUNT);
>  	mmap_init();
>  	nsproxy_cache_init();
>  }
> diff --git a/mm/Makefile b/mm/Makefile
> index 15a901bb431a..690ddcf7d9a1 100644
> --- a/mm/Makefile
> +++ b/mm/Makefile
> @@ -55,7 +55,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
>  			   mm_init.o percpu.o slab_common.o \
>  			   compaction.o show_mem.o \
>  			   interval_tree.o list_lru.o workingset.o \
> -			   debug.o gup.o mmap_lock.o $(mmu-y)
> +			   debug.o gup.o mmap_lock.o vma_init.o $(mmu-y)
>
>  # Give 'page_alloc' its own module-parameter namespace
>  page-alloc-y := page_alloc.o
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 5259df031e15..81dd962a1cfc 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1554,7 +1554,7 @@ static const struct ctl_table mmap_table[] = {
>  #endif /* CONFIG_SYSCTL */
>
>  /*
> - * initialise the percpu counter for VM
> + * initialise the percpu counter for VM, initialise VMA state.
>   */
>  void __init mmap_init(void)
>  {
> @@ -1565,6 +1565,7 @@ void __init mmap_init(void)
>  #ifdef CONFIG_SYSCTL
>  	register_sysctl_init("vm", mmap_table);
>  #endif
> +	vma_state_init();
>  }
>
>  /*
> diff --git a/mm/nommu.c b/mm/nommu.c
> index a142fc258d39..0bf4849b8204 100644
> --- a/mm/nommu.c
> +++ b/mm/nommu.c
> @@ -399,7 +399,8 @@ static const struct ctl_table nommu_table[] = {
>  };
>
>  /*
> - * initialise the percpu counter for VM and region record slabs
> + * initialise the percpu counter for VM and region record slabs, initialise VMA
> + * state.
>   */
>  void __init mmap_init(void)
>  {
> @@ -409,6 +410,7 @@ void __init mmap_init(void)
>  	VM_BUG_ON(ret);
>  	vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT);
>  	register_sysctl_init("vm", nommu_table);
> +	vma_state_init();
>  }
>
>  /*
> diff --git a/mm/vma.h b/mm/vma.h
> index 94307a2e4ab6..4a1e1768ca46 100644
> --- a/mm/vma.h
> +++ b/mm/vma.h
> @@ -548,8 +548,15 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address);
>
>  int __vm_munmap(unsigned long start, size_t len, bool unlock);
>
> +
>  int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma);
>
> +/* vma_init.h, shared between CONFIG_MMU and nommu. */
> +void __init vma_state_init(void);
> +struct vm_area_struct *vm_area_alloc(struct mm_struct *mm);
> +struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig);
> +void vm_area_free(struct vm_area_struct *vma);
> +
>  /* vma_exec.h */
>  #ifdef CONFIG_MMU
>  int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
> diff --git a/mm/vma_init.c b/mm/vma_init.c
> new file mode 100644
> index 000000000000..967ca8517986
> --- /dev/null
> +++ b/mm/vma_init.c
> @@ -0,0 +1,101 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +/*
> + * Functions for initialisaing, allocating, freeing and duplicating VMAs. Shared
> + * between CONFIG_MMU and non-CONFIG_MMU kernel configurations.
> + */
> +
> +#include "vma_internal.h"
> +#include "vma.h"
> +
> +/* SLAB cache for vm_area_struct structures */
> +static struct kmem_cache *vm_area_cachep;
> +
> +void __init vma_state_init(void)
> +{
> +	struct kmem_cache_args args = {
> +		.use_freeptr_offset = true,
> +		.freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
> +	};
> +
> +	vm_area_cachep = kmem_cache_create("vm_area_struct",
> +			sizeof(struct vm_area_struct), &args,
> +			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
> +			SLAB_ACCOUNT);
> +}
> +
> +struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
> +{
> +	struct vm_area_struct *vma;
> +
> +	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> +	if (!vma)
> +		return NULL;
> +
> +	vma_init(vma, mm);
> +
> +	return vma;
> +}
> +
> +static void vm_area_init_from(const struct vm_area_struct *src,
> +			      struct vm_area_struct *dest)
> +{
> +	dest->vm_mm = src->vm_mm;
> +	dest->vm_ops = src->vm_ops;
> +	dest->vm_start = src->vm_start;
> +	dest->vm_end = src->vm_end;
> +	dest->anon_vma = src->anon_vma;
> +	dest->vm_pgoff = src->vm_pgoff;
> +	dest->vm_file = src->vm_file;
> +	dest->vm_private_data = src->vm_private_data;
> +	vm_flags_init(dest, src->vm_flags);
> +	memcpy(&dest->vm_page_prot, &src->vm_page_prot,
> +	       sizeof(dest->vm_page_prot));
> +	/*
> +	 * src->shared.rb may be modified concurrently when called from
> +	 * dup_mmap(), but the clone will reinitialize it.
> +	 */
> +	data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
> +	memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
> +	       sizeof(dest->vm_userfaultfd_ctx));
> +#ifdef CONFIG_ANON_VMA_NAME
> +	dest->anon_name = src->anon_name;
> +#endif
> +#ifdef CONFIG_SWAP
> +	memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
> +	       sizeof(dest->swap_readahead_info));
> +#endif
> +#ifndef CONFIG_MMU
> +	dest->vm_region = src->vm_region;
> +#endif
> +#ifdef CONFIG_NUMA
> +	dest->vm_policy = src->vm_policy;
> +#endif
> +}
> +
> +struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
> +{
> +	struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> +
> +	if (!new)
> +		return NULL;
> +
> +	ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
> +	ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
> +	vm_area_init_from(orig, new);
> +	vma_lock_init(new, true);
> +	INIT_LIST_HEAD(&new->anon_vma_chain);
> +	vma_numab_state_init(new);
> +	dup_anon_vma_name(orig, new);
> +
> +	return new;
> +}
> +
> +void vm_area_free(struct vm_area_struct *vma)
> +{
> +	/* The vma should be detached while being destroyed. */
> +	vma_assert_detached(vma);
> +	vma_numab_state_free(vma);
> +	free_anon_vma_name(vma);
> +	kmem_cache_free(vm_area_cachep, vma);
> +}
> diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile
> index 624040fcf193..66f3831a668f 100644
> --- a/tools/testing/vma/Makefile
> +++ b/tools/testing/vma/Makefile
> @@ -9,7 +9,7 @@ include ../shared/shared.mk
>  OFILES = $(SHARED_OFILES) vma.o maple-shim.o
>  TARGETS = vma
>
> -vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_exec.c ../../../mm/vma.h
> +vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h
>
>  vma:	$(OFILES)
>  	$(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS)
> diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
> index 5832ae5d797d..2be7597a2ac2 100644
> --- a/tools/testing/vma/vma.c
> +++ b/tools/testing/vma/vma.c
> @@ -28,6 +28,7 @@ unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
>   * Directly import the VMA implementation here. Our vma_internal.h wrapper
>   * provides userland-equivalent functionality for everything vma.c uses.
>   */
> +#include "../../../mm/vma_init.c"
>  #include "../../../mm/vma_exec.c"
>  #include "../../../mm/vma.c"
>
> @@ -91,6 +92,12 @@ static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma)
>  	return res;
>  }
>
> +static void detach_free_vma(struct vm_area_struct *vma)
> +{
> +	vma_mark_detached(vma);
> +	vm_area_free(vma);
> +}
> +
>  /* Helper function to allocate a VMA and link it to the tree. */
>  static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
>  						 unsigned long start,
> @@ -104,7 +111,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
>  		return NULL;
>
>  	if (attach_vma(mm, vma)) {
> -		vm_area_free(vma);
> +		detach_free_vma(vma);
>  		return NULL;
>  	}
>
> @@ -249,7 +256,7 @@ static int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi)
>
>  	vma_iter_set(vmi, 0);
>  	for_each_vma(*vmi, vma) {
> -		vm_area_free(vma);
> +		detach_free_vma(vma);
>  		count++;
>  	}
>
> @@ -319,7 +326,7 @@ static bool test_simple_merge(void)
>  	ASSERT_EQ(vma->vm_pgoff, 0);
>  	ASSERT_EQ(vma->vm_flags, flags);
>
> -	vm_area_free(vma);
> +	detach_free_vma(vma);
>  	mtree_destroy(&mm.mm_mt);
>
>  	return true;
> @@ -361,7 +368,7 @@ static bool test_simple_modify(void)
>  	ASSERT_EQ(vma->vm_end, 0x1000);
>  	ASSERT_EQ(vma->vm_pgoff, 0);
>
> -	vm_area_free(vma);
> +	detach_free_vma(vma);
>  	vma_iter_clear(&vmi);
>
>  	vma = vma_next(&vmi);
> @@ -370,7 +377,7 @@ static bool test_simple_modify(void)
>  	ASSERT_EQ(vma->vm_end, 0x2000);
>  	ASSERT_EQ(vma->vm_pgoff, 1);
>
> -	vm_area_free(vma);
> +	detach_free_vma(vma);
>  	vma_iter_clear(&vmi);
>
>  	vma = vma_next(&vmi);
> @@ -379,7 +386,7 @@ static bool test_simple_modify(void)
>  	ASSERT_EQ(vma->vm_end, 0x3000);
>  	ASSERT_EQ(vma->vm_pgoff, 2);
>
> -	vm_area_free(vma);
> +	detach_free_vma(vma);
>  	mtree_destroy(&mm.mm_mt);
>
>  	return true;
> @@ -407,7 +414,7 @@ static bool test_simple_expand(void)
>  	ASSERT_EQ(vma->vm_end, 0x3000);
>  	ASSERT_EQ(vma->vm_pgoff, 0);
>
> -	vm_area_free(vma);
> +	detach_free_vma(vma);
>  	mtree_destroy(&mm.mm_mt);
>
>  	return true;
> @@ -428,7 +435,7 @@ static bool test_simple_shrink(void)
>  	ASSERT_EQ(vma->vm_end, 0x1000);
>  	ASSERT_EQ(vma->vm_pgoff, 0);
>
> -	vm_area_free(vma);
> +	detach_free_vma(vma);
>  	mtree_destroy(&mm.mm_mt);
>
>  	return true;
> @@ -619,7 +626,7 @@ static bool test_merge_new(void)
>  		ASSERT_EQ(vma->vm_pgoff, 0);
>  		ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
>
> -		vm_area_free(vma);
> +		detach_free_vma(vma);
>  		count++;
>  	}
>
> @@ -1668,6 +1675,7 @@ int main(void)
>  	int num_tests = 0, num_fail = 0;
>
>  	maple_tree_init();
> +	vma_state_init();
>
>  #define TEST(name)							\
>  	do {								\
> diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
> index 32e990313158..198abe66de5a 100644
> --- a/tools/testing/vma/vma_internal.h
> +++ b/tools/testing/vma/vma_internal.h
> @@ -155,6 +155,10 @@ typedef __bitwise unsigned int vm_fault_t;
>   */
>  #define pr_warn_once pr_err
>
> +#define data_race(expr) expr
> +
> +#define ASSERT_EXCLUSIVE_WRITER(x)
> +
>  struct kref {
>  	refcount_t refcount;
>  };
> @@ -255,6 +259,8 @@ struct file {
>
>  #define VMA_LOCK_OFFSET	0x40000000
>
> +typedef struct { unsigned long v; } freeptr_t;
> +
>  struct vm_area_struct {
>  	/* The first cache line has the info for VMA tree walking. */
>
> @@ -264,9 +270,7 @@ struct vm_area_struct {
>  			unsigned long vm_start;
>  			unsigned long vm_end;
>  		};
> -#ifdef CONFIG_PER_VMA_LOCK
> -		struct rcu_head vm_rcu;	/* Used for deferred freeing. */
> -#endif
> +		freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
>  	};
>
>  	struct mm_struct *vm_mm;	/* The address space we belong to. */
> @@ -463,6 +467,65 @@ struct pagetable_move_control {
>  		.len_in = len_,						\
>  	}
>
> +struct kmem_cache_args {
> +	/**
> +	 * @align: The required alignment for the objects.
> +	 *
> +	 * %0 means no specific alignment is requested.
> +	 */
> +	unsigned int align;
> +	/**
> +	 * @useroffset: Usercopy region offset.
> +	 *
> +	 * %0 is a valid offset, when @usersize is non-%0
> +	 */
> +	unsigned int useroffset;
> +	/**
> +	 * @usersize: Usercopy region size.
> +	 *
> +	 * %0 means no usercopy region is specified.
> +	 */
> +	unsigned int usersize;
> +	/**
> +	 * @freeptr_offset: Custom offset for the free pointer
> +	 * in &SLAB_TYPESAFE_BY_RCU caches
> +	 *
> +	 * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
> +	 * outside of the object. This might cause the object to grow in size.
> +	 * Cache creators that have a reason to avoid this can specify a custom
> +	 * free pointer offset in their struct where the free pointer will be
> +	 * placed.
> +	 *
> +	 * Note that placing the free pointer inside the object requires the
> +	 * caller to ensure that no fields are invalidated that are required to
> +	 * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
> +	 * details).
> +	 *
> +	 * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
> +	 * is specified, %use_freeptr_offset must be set %true.
> +	 *
> +	 * Note that @ctor currently isn't supported with custom free pointers
> +	 * as a @ctor requires an external free pointer.
> +	 */
> +	unsigned int freeptr_offset;
> +	/**
> +	 * @use_freeptr_offset: Whether a @freeptr_offset is used.
> +	 */
> +	bool use_freeptr_offset;
> +	/**
> +	 * @ctor: A constructor for the objects.
> +	 *
> +	 * The constructor is invoked for each object in a newly allocated slab
> +	 * page. It is the cache user's responsibility to free object in the
> +	 * same state as after calling the constructor, or deal appropriately
> +	 * with any differences between a freshly constructed and a reallocated
> +	 * object.
> +	 *
> +	 * %NULL means no constructor.
> +	 */
> +	void (*ctor)(void *);
> +};
> +
>  static inline void vma_iter_invalidate(struct vma_iterator *vmi)
>  {
>  	mas_pause(&vmi->mas);
> @@ -547,31 +610,38 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
>  	vma->vm_lock_seq = UINT_MAX;
>  }
>
> -static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
> -{
> -	struct vm_area_struct *vma = calloc(1, sizeof(struct vm_area_struct));
> +struct kmem_cache {
> +	const char *name;
> +	size_t object_size;
> +	struct kmem_cache_args *args;
> +};
>
> -	if (!vma)
> -		return NULL;
> +static inline struct kmem_cache *__kmem_cache_create(const char *name,
> +						     size_t object_size,
> +						     struct kmem_cache_args *args)
> +{
> +	struct kmem_cache *ret = malloc(sizeof(struct kmem_cache));
>
> -	vma_init(vma, mm);
> +	ret->name = name;
> +	ret->object_size = object_size;
> +	ret->args = args;
>
> -	return vma;
> +	return ret;
>  }
>
> -static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
> -{
> -	struct vm_area_struct *new = calloc(1, sizeof(struct vm_area_struct));
> +#define kmem_cache_create(__name, __object_size, __args, ...)           \
> +	__kmem_cache_create((__name), (__object_size), (__args))
>
> -	if (!new)
> -		return NULL;
> +static inline void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
> +{
> +	(void)gfpflags;
>
> -	memcpy(new, orig, sizeof(*new));
> -	refcount_set(&new->vm_refcnt, 0);
> -	new->vm_lock_seq = UINT_MAX;
> -	INIT_LIST_HEAD(&new->anon_vma_chain);
> +	return calloc(s->object_size, 1);
> +}
>
> -	return new;
> +static inline void kmem_cache_free(struct kmem_cache *s, void *x)
> +{
> +	free(x);
>  }
>
>  /*
> @@ -738,11 +808,6 @@ static inline void mpol_put(struct mempolicy *)
>  {
>  }
>
> -static inline void vm_area_free(struct vm_area_struct *vma)
> -{
> -	free(vma);
> -}
> -
>  static inline void lru_add_drain(void)
>  {
>  }
> @@ -1312,4 +1377,32 @@ static inline void ksm_exit(struct mm_struct *mm)
>  	(void)mm;
>  }
>
> +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
> +{
> +	(void)vma;
> +	(void)reset_refcnt;
> +}
> +
> +static inline void vma_numab_state_init(struct vm_area_struct *vma)
> +{
> +	(void)vma;
> +}
> +
> +static inline void vma_numab_state_free(struct vm_area_struct *vma)
> +{
> +	(void)vma;
> +}
> +
> +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
> +				     struct vm_area_struct *new_vma)
> +{
> +	(void)orig_vma;
> +	(void)new_vma;
> +}
> +
> +static inline void free_anon_vma_name(struct vm_area_struct *vma)
> +{
> +	(void)vma;
> +}
> +
>  #endif	/* __MM_VMA_INTERNAL_H */
> --
> 2.49.0
>
Re: [PATCH v3 4/4] mm: perform VMA allocation, freeing, duplication in mm
Posted by Liam R. Howlett 7 months, 3 weeks ago
* Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [250428 11:28]:
> Right now these are performed in kernel/fork.c which is odd and a violation
> of separation of concerns, as well as preventing us from integrating this
> and related logic into userland VMA testing going forward, and perhaps more
> importantly - enabling us to, in a subsequent commit, make VMA
> allocation/freeing a purely internal mm operation.
> 
> There is a fly in the ointment - nommu - mmap.c is not compiled if
> CONFIG_MMU not set, and neither is vma.c.
> 
> To square the circle, let's add a new file - vma_init.c. This will be
> compiled for both CONFIG_MMU and nommu builds, and will also form part of
> the VMA userland testing.
> 
> This allows us to de-duplicate code, while maintaining separation of
> concerns and the ability for us to userland test this logic.
> 
> Update the VMA userland tests accordingly, additionally adding a
> detach_free_vma() helper function to correctly detach VMAs before freeing
> them in test code, as this change was triggering the assert for this.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

One small nit below.

Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>

> ---
>  MAINTAINERS                      |   1 +
>  kernel/fork.c                    |  88 -------------------
>  mm/Makefile                      |   2 +-
>  mm/mmap.c                        |   3 +-
>  mm/nommu.c                       |   4 +-
>  mm/vma.h                         |   7 ++
>  mm/vma_init.c                    | 101 ++++++++++++++++++++++
>  tools/testing/vma/Makefile       |   2 +-
>  tools/testing/vma/vma.c          |  26 ++++--
>  tools/testing/vma/vma_internal.h | 143 +++++++++++++++++++++++++------
>  10 files changed, 251 insertions(+), 126 deletions(-)
>  create mode 100644 mm/vma_init.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 1ee1c22e6e36..d274e6802ba5 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -15656,6 +15656,7 @@ F:	mm/mseal.c
>  F:	mm/vma.c
>  F:	mm/vma.h
>  F:	mm/vma_exec.c
> +F:	mm/vma_init.c
>  F:	mm/vma_internal.h
>  F:	tools/testing/selftests/mm/merge.c
>  F:	tools/testing/vma/
> diff --git a/kernel/fork.c b/kernel/fork.c
> index ac9f9267a473..9e4616dacd82 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -431,88 +431,9 @@ struct kmem_cache *files_cachep;
>  /* SLAB cache for fs_struct structures (tsk->fs) */
>  struct kmem_cache *fs_cachep;
>  
> -/* SLAB cache for vm_area_struct structures */
> -static struct kmem_cache *vm_area_cachep;
> -
>  /* SLAB cache for mm_struct structures (tsk->mm) */
>  static struct kmem_cache *mm_cachep;
>  
> -struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
> -{
> -	struct vm_area_struct *vma;
> -
> -	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> -	if (!vma)
> -		return NULL;
> -
> -	vma_init(vma, mm);
> -
> -	return vma;
> -}
> -
> -static void vm_area_init_from(const struct vm_area_struct *src,
> -			      struct vm_area_struct *dest)
> -{
> -	dest->vm_mm = src->vm_mm;
> -	dest->vm_ops = src->vm_ops;
> -	dest->vm_start = src->vm_start;
> -	dest->vm_end = src->vm_end;
> -	dest->anon_vma = src->anon_vma;
> -	dest->vm_pgoff = src->vm_pgoff;
> -	dest->vm_file = src->vm_file;
> -	dest->vm_private_data = src->vm_private_data;
> -	vm_flags_init(dest, src->vm_flags);
> -	memcpy(&dest->vm_page_prot, &src->vm_page_prot,
> -	       sizeof(dest->vm_page_prot));
> -	/*
> -	 * src->shared.rb may be modified concurrently when called from
> -	 * dup_mmap(), but the clone will reinitialize it.
> -	 */
> -	data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
> -	memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
> -	       sizeof(dest->vm_userfaultfd_ctx));
> -#ifdef CONFIG_ANON_VMA_NAME
> -	dest->anon_name = src->anon_name;
> -#endif
> -#ifdef CONFIG_SWAP
> -	memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
> -	       sizeof(dest->swap_readahead_info));
> -#endif
> -#ifndef CONFIG_MMU
> -	dest->vm_region = src->vm_region;
> -#endif
> -#ifdef CONFIG_NUMA
> -	dest->vm_policy = src->vm_policy;
> -#endif
> -}
> -
> -struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
> -{
> -	struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> -
> -	if (!new)
> -		return NULL;
> -
> -	ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
> -	ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
> -	vm_area_init_from(orig, new);
> -	vma_lock_init(new, true);
> -	INIT_LIST_HEAD(&new->anon_vma_chain);
> -	vma_numab_state_init(new);
> -	dup_anon_vma_name(orig, new);
> -
> -	return new;
> -}
> -
> -void vm_area_free(struct vm_area_struct *vma)
> -{
> -	/* The vma should be detached while being destroyed. */
> -	vma_assert_detached(vma);
> -	vma_numab_state_free(vma);
> -	free_anon_vma_name(vma);
> -	kmem_cache_free(vm_area_cachep, vma);
> -}
> -
>  static void account_kernel_stack(struct task_struct *tsk, int account)
>  {
>  	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
> @@ -3033,11 +2954,6 @@ void __init mm_cache_init(void)
>  
>  void __init proc_caches_init(void)
>  {
> -	struct kmem_cache_args args = {
> -		.use_freeptr_offset = true,
> -		.freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
> -	};
> -
>  	sighand_cachep = kmem_cache_create("sighand_cache",
>  			sizeof(struct sighand_struct), 0,
>  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
> @@ -3054,10 +2970,6 @@ void __init proc_caches_init(void)
>  			sizeof(struct fs_struct), 0,
>  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
>  			NULL);
> -	vm_area_cachep = kmem_cache_create("vm_area_struct",
> -			sizeof(struct vm_area_struct), &args,
> -			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
> -			SLAB_ACCOUNT);
>  	mmap_init();
>  	nsproxy_cache_init();
>  }
> diff --git a/mm/Makefile b/mm/Makefile
> index 15a901bb431a..690ddcf7d9a1 100644
> --- a/mm/Makefile
> +++ b/mm/Makefile
> @@ -55,7 +55,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
>  			   mm_init.o percpu.o slab_common.o \
>  			   compaction.o show_mem.o \
>  			   interval_tree.o list_lru.o workingset.o \
> -			   debug.o gup.o mmap_lock.o $(mmu-y)
> +			   debug.o gup.o mmap_lock.o vma_init.o $(mmu-y)
>  
>  # Give 'page_alloc' its own module-parameter namespace
>  page-alloc-y := page_alloc.o
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 5259df031e15..81dd962a1cfc 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1554,7 +1554,7 @@ static const struct ctl_table mmap_table[] = {
>  #endif /* CONFIG_SYSCTL */
>  
>  /*
> - * initialise the percpu counter for VM
> + * initialise the percpu counter for VM, initialise VMA state.
>   */
>  void __init mmap_init(void)
>  {
> @@ -1565,6 +1565,7 @@ void __init mmap_init(void)
>  #ifdef CONFIG_SYSCTL
>  	register_sysctl_init("vm", mmap_table);
>  #endif
> +	vma_state_init();
>  }
>  
>  /*
> diff --git a/mm/nommu.c b/mm/nommu.c
> index a142fc258d39..0bf4849b8204 100644
> --- a/mm/nommu.c
> +++ b/mm/nommu.c
> @@ -399,7 +399,8 @@ static const struct ctl_table nommu_table[] = {
>  };
>  
>  /*
> - * initialise the percpu counter for VM and region record slabs
> + * initialise the percpu counter for VM and region record slabs, initialise VMA
> + * state.
>   */
>  void __init mmap_init(void)
>  {
> @@ -409,6 +410,7 @@ void __init mmap_init(void)
>  	VM_BUG_ON(ret);
>  	vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT);
>  	register_sysctl_init("vm", nommu_table);
> +	vma_state_init();
>  }
>  
>  /*
> diff --git a/mm/vma.h b/mm/vma.h
> index 94307a2e4ab6..4a1e1768ca46 100644
> --- a/mm/vma.h
> +++ b/mm/vma.h
> @@ -548,8 +548,15 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address);
>  
>  int __vm_munmap(unsigned long start, size_t len, bool unlock);
>  
> +

Accidental extra line here?

>  int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma);
>  
> +/* vma_init.h, shared between CONFIG_MMU and nommu. */
> +void __init vma_state_init(void);
> +struct vm_area_struct *vm_area_alloc(struct mm_struct *mm);
> +struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig);
> +void vm_area_free(struct vm_area_struct *vma);
> +
>  /* vma_exec.h */
>  #ifdef CONFIG_MMU
>  int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
> diff --git a/mm/vma_init.c b/mm/vma_init.c
> new file mode 100644
> index 000000000000..967ca8517986
> --- /dev/null
> +++ b/mm/vma_init.c
> @@ -0,0 +1,101 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +/*
> + * Functions for initialisaing, allocating, freeing and duplicating VMAs. Shared
> + * between CONFIG_MMU and non-CONFIG_MMU kernel configurations.
> + */
> +
> +#include "vma_internal.h"
> +#include "vma.h"
> +
> +/* SLAB cache for vm_area_struct structures */
> +static struct kmem_cache *vm_area_cachep;
> +
> +void __init vma_state_init(void)
> +{
> +	struct kmem_cache_args args = {
> +		.use_freeptr_offset = true,
> +		.freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
> +	};
> +
> +	vm_area_cachep = kmem_cache_create("vm_area_struct",
> +			sizeof(struct vm_area_struct), &args,
> +			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
> +			SLAB_ACCOUNT);
> +}
> +
> +struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
> +{
> +	struct vm_area_struct *vma;
> +
> +	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> +	if (!vma)
> +		return NULL;
> +
> +	vma_init(vma, mm);
> +
> +	return vma;
> +}
> +
> +static void vm_area_init_from(const struct vm_area_struct *src,
> +			      struct vm_area_struct *dest)
> +{
> +	dest->vm_mm = src->vm_mm;
> +	dest->vm_ops = src->vm_ops;
> +	dest->vm_start = src->vm_start;
> +	dest->vm_end = src->vm_end;
> +	dest->anon_vma = src->anon_vma;
> +	dest->vm_pgoff = src->vm_pgoff;
> +	dest->vm_file = src->vm_file;
> +	dest->vm_private_data = src->vm_private_data;
> +	vm_flags_init(dest, src->vm_flags);
> +	memcpy(&dest->vm_page_prot, &src->vm_page_prot,
> +	       sizeof(dest->vm_page_prot));
> +	/*
> +	 * src->shared.rb may be modified concurrently when called from
> +	 * dup_mmap(), but the clone will reinitialize it.
> +	 */
> +	data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
> +	memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
> +	       sizeof(dest->vm_userfaultfd_ctx));
> +#ifdef CONFIG_ANON_VMA_NAME
> +	dest->anon_name = src->anon_name;
> +#endif
> +#ifdef CONFIG_SWAP
> +	memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
> +	       sizeof(dest->swap_readahead_info));
> +#endif
> +#ifndef CONFIG_MMU
> +	dest->vm_region = src->vm_region;
> +#endif
> +#ifdef CONFIG_NUMA
> +	dest->vm_policy = src->vm_policy;
> +#endif
> +}
> +
> +struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
> +{
> +	struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> +
> +	if (!new)
> +		return NULL;
> +
> +	ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
> +	ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
> +	vm_area_init_from(orig, new);
> +	vma_lock_init(new, true);
> +	INIT_LIST_HEAD(&new->anon_vma_chain);
> +	vma_numab_state_init(new);
> +	dup_anon_vma_name(orig, new);
> +
> +	return new;
> +}
> +
> +void vm_area_free(struct vm_area_struct *vma)
> +{
> +	/* The vma should be detached while being destroyed. */
> +	vma_assert_detached(vma);
> +	vma_numab_state_free(vma);
> +	free_anon_vma_name(vma);
> +	kmem_cache_free(vm_area_cachep, vma);
> +}
> diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile
> index 624040fcf193..66f3831a668f 100644
> --- a/tools/testing/vma/Makefile
> +++ b/tools/testing/vma/Makefile
> @@ -9,7 +9,7 @@ include ../shared/shared.mk
>  OFILES = $(SHARED_OFILES) vma.o maple-shim.o
>  TARGETS = vma
>  
> -vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_exec.c ../../../mm/vma.h
> +vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h
>  
>  vma:	$(OFILES)
>  	$(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS)
> diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
> index 5832ae5d797d..2be7597a2ac2 100644
> --- a/tools/testing/vma/vma.c
> +++ b/tools/testing/vma/vma.c
> @@ -28,6 +28,7 @@ unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
>   * Directly import the VMA implementation here. Our vma_internal.h wrapper
>   * provides userland-equivalent functionality for everything vma.c uses.
>   */
> +#include "../../../mm/vma_init.c"
>  #include "../../../mm/vma_exec.c"
>  #include "../../../mm/vma.c"
>  
> @@ -91,6 +92,12 @@ static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma)
>  	return res;
>  }
>  
> +static void detach_free_vma(struct vm_area_struct *vma)
> +{
> +	vma_mark_detached(vma);
> +	vm_area_free(vma);
> +}
> +
>  /* Helper function to allocate a VMA and link it to the tree. */
>  static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
>  						 unsigned long start,
> @@ -104,7 +111,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
>  		return NULL;
>  
>  	if (attach_vma(mm, vma)) {
> -		vm_area_free(vma);
> +		detach_free_vma(vma);
>  		return NULL;
>  	}
>  
> @@ -249,7 +256,7 @@ static int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi)
>  
>  	vma_iter_set(vmi, 0);
>  	for_each_vma(*vmi, vma) {
> -		vm_area_free(vma);
> +		detach_free_vma(vma);
>  		count++;
>  	}
>  
> @@ -319,7 +326,7 @@ static bool test_simple_merge(void)
>  	ASSERT_EQ(vma->vm_pgoff, 0);
>  	ASSERT_EQ(vma->vm_flags, flags);
>  
> -	vm_area_free(vma);
> +	detach_free_vma(vma);
>  	mtree_destroy(&mm.mm_mt);
>  
>  	return true;
> @@ -361,7 +368,7 @@ static bool test_simple_modify(void)
>  	ASSERT_EQ(vma->vm_end, 0x1000);
>  	ASSERT_EQ(vma->vm_pgoff, 0);
>  
> -	vm_area_free(vma);
> +	detach_free_vma(vma);
>  	vma_iter_clear(&vmi);
>  
>  	vma = vma_next(&vmi);
> @@ -370,7 +377,7 @@ static bool test_simple_modify(void)
>  	ASSERT_EQ(vma->vm_end, 0x2000);
>  	ASSERT_EQ(vma->vm_pgoff, 1);
>  
> -	vm_area_free(vma);
> +	detach_free_vma(vma);
>  	vma_iter_clear(&vmi);
>  
>  	vma = vma_next(&vmi);
> @@ -379,7 +386,7 @@ static bool test_simple_modify(void)
>  	ASSERT_EQ(vma->vm_end, 0x3000);
>  	ASSERT_EQ(vma->vm_pgoff, 2);
>  
> -	vm_area_free(vma);
> +	detach_free_vma(vma);
>  	mtree_destroy(&mm.mm_mt);
>  
>  	return true;
> @@ -407,7 +414,7 @@ static bool test_simple_expand(void)
>  	ASSERT_EQ(vma->vm_end, 0x3000);
>  	ASSERT_EQ(vma->vm_pgoff, 0);
>  
> -	vm_area_free(vma);
> +	detach_free_vma(vma);
>  	mtree_destroy(&mm.mm_mt);
>  
>  	return true;
> @@ -428,7 +435,7 @@ static bool test_simple_shrink(void)
>  	ASSERT_EQ(vma->vm_end, 0x1000);
>  	ASSERT_EQ(vma->vm_pgoff, 0);
>  
> -	vm_area_free(vma);
> +	detach_free_vma(vma);
>  	mtree_destroy(&mm.mm_mt);
>  
>  	return true;
> @@ -619,7 +626,7 @@ static bool test_merge_new(void)
>  		ASSERT_EQ(vma->vm_pgoff, 0);
>  		ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
>  
> -		vm_area_free(vma);
> +		detach_free_vma(vma);
>  		count++;
>  	}
>  
> @@ -1668,6 +1675,7 @@ int main(void)
>  	int num_tests = 0, num_fail = 0;
>  
>  	maple_tree_init();
> +	vma_state_init();
>  
>  #define TEST(name)							\
>  	do {								\
> diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
> index 32e990313158..198abe66de5a 100644
> --- a/tools/testing/vma/vma_internal.h
> +++ b/tools/testing/vma/vma_internal.h
> @@ -155,6 +155,10 @@ typedef __bitwise unsigned int vm_fault_t;
>   */
>  #define pr_warn_once pr_err
>  
> +#define data_race(expr) expr
> +
> +#define ASSERT_EXCLUSIVE_WRITER(x)
> +
>  struct kref {
>  	refcount_t refcount;
>  };
> @@ -255,6 +259,8 @@ struct file {
>  
>  #define VMA_LOCK_OFFSET	0x40000000
>  
> +typedef struct { unsigned long v; } freeptr_t;
> +
>  struct vm_area_struct {
>  	/* The first cache line has the info for VMA tree walking. */
>  
> @@ -264,9 +270,7 @@ struct vm_area_struct {
>  			unsigned long vm_start;
>  			unsigned long vm_end;
>  		};
> -#ifdef CONFIG_PER_VMA_LOCK
> -		struct rcu_head vm_rcu;	/* Used for deferred freeing. */
> -#endif
> +		freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
>  	};
>  
>  	struct mm_struct *vm_mm;	/* The address space we belong to. */
> @@ -463,6 +467,65 @@ struct pagetable_move_control {
>  		.len_in = len_,						\
>  	}
>  
> +struct kmem_cache_args {
> +	/**
> +	 * @align: The required alignment for the objects.
> +	 *
> +	 * %0 means no specific alignment is requested.
> +	 */
> +	unsigned int align;
> +	/**
> +	 * @useroffset: Usercopy region offset.
> +	 *
> +	 * %0 is a valid offset, when @usersize is non-%0
> +	 */
> +	unsigned int useroffset;
> +	/**
> +	 * @usersize: Usercopy region size.
> +	 *
> +	 * %0 means no usercopy region is specified.
> +	 */
> +	unsigned int usersize;
> +	/**
> +	 * @freeptr_offset: Custom offset for the free pointer
> +	 * in &SLAB_TYPESAFE_BY_RCU caches
> +	 *
> +	 * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
> +	 * outside of the object. This might cause the object to grow in size.
> +	 * Cache creators that have a reason to avoid this can specify a custom
> +	 * free pointer offset in their struct where the free pointer will be
> +	 * placed.
> +	 *
> +	 * Note that placing the free pointer inside the object requires the
> +	 * caller to ensure that no fields are invalidated that are required to
> +	 * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
> +	 * details).
> +	 *
> +	 * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
> +	 * is specified, %use_freeptr_offset must be set %true.
> +	 *
> +	 * Note that @ctor currently isn't supported with custom free pointers
> +	 * as a @ctor requires an external free pointer.
> +	 */
> +	unsigned int freeptr_offset;
> +	/**
> +	 * @use_freeptr_offset: Whether a @freeptr_offset is used.
> +	 */
> +	bool use_freeptr_offset;
> +	/**
> +	 * @ctor: A constructor for the objects.
> +	 *
> +	 * The constructor is invoked for each object in a newly allocated slab
> +	 * page. It is the cache user's responsibility to free object in the
> +	 * same state as after calling the constructor, or deal appropriately
> +	 * with any differences between a freshly constructed and a reallocated
> +	 * object.
> +	 *
> +	 * %NULL means no constructor.
> +	 */
> +	void (*ctor)(void *);
> +};
> +
>  static inline void vma_iter_invalidate(struct vma_iterator *vmi)
>  {
>  	mas_pause(&vmi->mas);
> @@ -547,31 +610,38 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
>  	vma->vm_lock_seq = UINT_MAX;
>  }
>  
> -static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
> -{
> -	struct vm_area_struct *vma = calloc(1, sizeof(struct vm_area_struct));
> +struct kmem_cache {
> +	const char *name;
> +	size_t object_size;
> +	struct kmem_cache_args *args;
> +};
>  
> -	if (!vma)
> -		return NULL;
> +static inline struct kmem_cache *__kmem_cache_create(const char *name,
> +						     size_t object_size,
> +						     struct kmem_cache_args *args)
> +{
> +	struct kmem_cache *ret = malloc(sizeof(struct kmem_cache));
>  
> -	vma_init(vma, mm);
> +	ret->name = name;
> +	ret->object_size = object_size;
> +	ret->args = args;
>  
> -	return vma;
> +	return ret;
>  }
>  
> -static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
> -{
> -	struct vm_area_struct *new = calloc(1, sizeof(struct vm_area_struct));
> +#define kmem_cache_create(__name, __object_size, __args, ...)           \
> +	__kmem_cache_create((__name), (__object_size), (__args))
>  
> -	if (!new)
> -		return NULL;
> +static inline void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
> +{
> +	(void)gfpflags;
>  
> -	memcpy(new, orig, sizeof(*new));
> -	refcount_set(&new->vm_refcnt, 0);
> -	new->vm_lock_seq = UINT_MAX;
> -	INIT_LIST_HEAD(&new->anon_vma_chain);
> +	return calloc(s->object_size, 1);
> +}
>  
> -	return new;
> +static inline void kmem_cache_free(struct kmem_cache *s, void *x)
> +{
> +	free(x);
>  }
>  
>  /*
> @@ -738,11 +808,6 @@ static inline void mpol_put(struct mempolicy *)
>  {
>  }
>  
> -static inline void vm_area_free(struct vm_area_struct *vma)
> -{
> -	free(vma);
> -}
> -
>  static inline void lru_add_drain(void)
>  {
>  }
> @@ -1312,4 +1377,32 @@ static inline void ksm_exit(struct mm_struct *mm)
>  	(void)mm;
>  }
>  
> +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
> +{
> +	(void)vma;
> +	(void)reset_refcnt;
> +}
> +
> +static inline void vma_numab_state_init(struct vm_area_struct *vma)
> +{
> +	(void)vma;
> +}
> +
> +static inline void vma_numab_state_free(struct vm_area_struct *vma)
> +{
> +	(void)vma;
> +}
> +
> +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
> +				     struct vm_area_struct *new_vma)
> +{
> +	(void)orig_vma;
> +	(void)new_vma;
> +}
> +
> +static inline void free_anon_vma_name(struct vm_area_struct *vma)
> +{
> +	(void)vma;
> +}
> +
>  #endif	/* __MM_VMA_INTERNAL_H */
> -- 
> 2.49.0
>
Re: [PATCH v3 4/4] mm: perform VMA allocation, freeing, duplication in mm
Posted by Lorenzo Stoakes 7 months, 3 weeks ago
On Mon, Apr 28, 2025 at 03:14:46PM -0400, Liam R. Howlett wrote:
> * Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [250428 11:28]:
> > Right now these are performed in kernel/fork.c which is odd and a violation
> > of separation of concerns, as well as preventing us from integrating this
> > and related logic into userland VMA testing going forward, and perhaps more
> > importantly - enabling us to, in a subsequent commit, make VMA
> > allocation/freeing a purely internal mm operation.
> >
> > There is a fly in the ointment - nommu - mmap.c is not compiled if
> > CONFIG_MMU not set, and neither is vma.c.
> >
> > To square the circle, let's add a new file - vma_init.c. This will be
> > compiled for both CONFIG_MMU and nommu builds, and will also form part of
> > the VMA userland testing.
> >
> > This allows us to de-duplicate code, while maintaining separation of
> > concerns and the ability for us to userland test this logic.
> >
> > Update the VMA userland tests accordingly, additionally adding a
> > detach_free_vma() helper function to correctly detach VMAs before freeing
> > them in test code, as this change was triggering the assert for this.
> >
> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>
> One small nit below.
>
> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>

Thanks!

I see Andrew already fixed up the nit :)

>
> > ---
> >  MAINTAINERS                      |   1 +
> >  kernel/fork.c                    |  88 -------------------
> >  mm/Makefile                      |   2 +-
> >  mm/mmap.c                        |   3 +-
> >  mm/nommu.c                       |   4 +-
> >  mm/vma.h                         |   7 ++
> >  mm/vma_init.c                    | 101 ++++++++++++++++++++++
> >  tools/testing/vma/Makefile       |   2 +-
> >  tools/testing/vma/vma.c          |  26 ++++--
> >  tools/testing/vma/vma_internal.h | 143 +++++++++++++++++++++++++------
> >  10 files changed, 251 insertions(+), 126 deletions(-)
> >  create mode 100644 mm/vma_init.c
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 1ee1c22e6e36..d274e6802ba5 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -15656,6 +15656,7 @@ F:	mm/mseal.c
> >  F:	mm/vma.c
> >  F:	mm/vma.h
> >  F:	mm/vma_exec.c
> > +F:	mm/vma_init.c
> >  F:	mm/vma_internal.h
> >  F:	tools/testing/selftests/mm/merge.c
> >  F:	tools/testing/vma/
> > diff --git a/kernel/fork.c b/kernel/fork.c
> > index ac9f9267a473..9e4616dacd82 100644
> > --- a/kernel/fork.c
> > +++ b/kernel/fork.c
> > @@ -431,88 +431,9 @@ struct kmem_cache *files_cachep;
> >  /* SLAB cache for fs_struct structures (tsk->fs) */
> >  struct kmem_cache *fs_cachep;
> >
> > -/* SLAB cache for vm_area_struct structures */
> > -static struct kmem_cache *vm_area_cachep;
> > -
> >  /* SLAB cache for mm_struct structures (tsk->mm) */
> >  static struct kmem_cache *mm_cachep;
> >
> > -struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
> > -{
> > -	struct vm_area_struct *vma;
> > -
> > -	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> > -	if (!vma)
> > -		return NULL;
> > -
> > -	vma_init(vma, mm);
> > -
> > -	return vma;
> > -}
> > -
> > -static void vm_area_init_from(const struct vm_area_struct *src,
> > -			      struct vm_area_struct *dest)
> > -{
> > -	dest->vm_mm = src->vm_mm;
> > -	dest->vm_ops = src->vm_ops;
> > -	dest->vm_start = src->vm_start;
> > -	dest->vm_end = src->vm_end;
> > -	dest->anon_vma = src->anon_vma;
> > -	dest->vm_pgoff = src->vm_pgoff;
> > -	dest->vm_file = src->vm_file;
> > -	dest->vm_private_data = src->vm_private_data;
> > -	vm_flags_init(dest, src->vm_flags);
> > -	memcpy(&dest->vm_page_prot, &src->vm_page_prot,
> > -	       sizeof(dest->vm_page_prot));
> > -	/*
> > -	 * src->shared.rb may be modified concurrently when called from
> > -	 * dup_mmap(), but the clone will reinitialize it.
> > -	 */
> > -	data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
> > -	memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
> > -	       sizeof(dest->vm_userfaultfd_ctx));
> > -#ifdef CONFIG_ANON_VMA_NAME
> > -	dest->anon_name = src->anon_name;
> > -#endif
> > -#ifdef CONFIG_SWAP
> > -	memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
> > -	       sizeof(dest->swap_readahead_info));
> > -#endif
> > -#ifndef CONFIG_MMU
> > -	dest->vm_region = src->vm_region;
> > -#endif
> > -#ifdef CONFIG_NUMA
> > -	dest->vm_policy = src->vm_policy;
> > -#endif
> > -}
> > -
> > -struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
> > -{
> > -	struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> > -
> > -	if (!new)
> > -		return NULL;
> > -
> > -	ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
> > -	ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
> > -	vm_area_init_from(orig, new);
> > -	vma_lock_init(new, true);
> > -	INIT_LIST_HEAD(&new->anon_vma_chain);
> > -	vma_numab_state_init(new);
> > -	dup_anon_vma_name(orig, new);
> > -
> > -	return new;
> > -}
> > -
> > -void vm_area_free(struct vm_area_struct *vma)
> > -{
> > -	/* The vma should be detached while being destroyed. */
> > -	vma_assert_detached(vma);
> > -	vma_numab_state_free(vma);
> > -	free_anon_vma_name(vma);
> > -	kmem_cache_free(vm_area_cachep, vma);
> > -}
> > -
> >  static void account_kernel_stack(struct task_struct *tsk, int account)
> >  {
> >  	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
> > @@ -3033,11 +2954,6 @@ void __init mm_cache_init(void)
> >
> >  void __init proc_caches_init(void)
> >  {
> > -	struct kmem_cache_args args = {
> > -		.use_freeptr_offset = true,
> > -		.freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
> > -	};
> > -
> >  	sighand_cachep = kmem_cache_create("sighand_cache",
> >  			sizeof(struct sighand_struct), 0,
> >  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
> > @@ -3054,10 +2970,6 @@ void __init proc_caches_init(void)
> >  			sizeof(struct fs_struct), 0,
> >  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
> >  			NULL);
> > -	vm_area_cachep = kmem_cache_create("vm_area_struct",
> > -			sizeof(struct vm_area_struct), &args,
> > -			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
> > -			SLAB_ACCOUNT);
> >  	mmap_init();
> >  	nsproxy_cache_init();
> >  }
> > diff --git a/mm/Makefile b/mm/Makefile
> > index 15a901bb431a..690ddcf7d9a1 100644
> > --- a/mm/Makefile
> > +++ b/mm/Makefile
> > @@ -55,7 +55,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
> >  			   mm_init.o percpu.o slab_common.o \
> >  			   compaction.o show_mem.o \
> >  			   interval_tree.o list_lru.o workingset.o \
> > -			   debug.o gup.o mmap_lock.o $(mmu-y)
> > +			   debug.o gup.o mmap_lock.o vma_init.o $(mmu-y)
> >
> >  # Give 'page_alloc' its own module-parameter namespace
> >  page-alloc-y := page_alloc.o
> > diff --git a/mm/mmap.c b/mm/mmap.c
> > index 5259df031e15..81dd962a1cfc 100644
> > --- a/mm/mmap.c
> > +++ b/mm/mmap.c
> > @@ -1554,7 +1554,7 @@ static const struct ctl_table mmap_table[] = {
> >  #endif /* CONFIG_SYSCTL */
> >
> >  /*
> > - * initialise the percpu counter for VM
> > + * initialise the percpu counter for VM, initialise VMA state.
> >   */
> >  void __init mmap_init(void)
> >  {
> > @@ -1565,6 +1565,7 @@ void __init mmap_init(void)
> >  #ifdef CONFIG_SYSCTL
> >  	register_sysctl_init("vm", mmap_table);
> >  #endif
> > +	vma_state_init();
> >  }
> >
> >  /*
> > diff --git a/mm/nommu.c b/mm/nommu.c
> > index a142fc258d39..0bf4849b8204 100644
> > --- a/mm/nommu.c
> > +++ b/mm/nommu.c
> > @@ -399,7 +399,8 @@ static const struct ctl_table nommu_table[] = {
> >  };
> >
> >  /*
> > - * initialise the percpu counter for VM and region record slabs
> > + * initialise the percpu counter for VM and region record slabs, initialise VMA
> > + * state.
> >   */
> >  void __init mmap_init(void)
> >  {
> > @@ -409,6 +410,7 @@ void __init mmap_init(void)
> >  	VM_BUG_ON(ret);
> >  	vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT);
> >  	register_sysctl_init("vm", nommu_table);
> > +	vma_state_init();
> >  }
> >
> >  /*
> > diff --git a/mm/vma.h b/mm/vma.h
> > index 94307a2e4ab6..4a1e1768ca46 100644
> > --- a/mm/vma.h
> > +++ b/mm/vma.h
> > @@ -548,8 +548,15 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address);
> >
> >  int __vm_munmap(unsigned long start, size_t len, bool unlock);
> >
> > +
>
> Accidental extra line here?
>
> >  int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma);
> >
> > +/* vma_init.h, shared between CONFIG_MMU and nommu. */
> > +void __init vma_state_init(void);
> > +struct vm_area_struct *vm_area_alloc(struct mm_struct *mm);
> > +struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig);
> > +void vm_area_free(struct vm_area_struct *vma);
> > +
> >  /* vma_exec.h */
> >  #ifdef CONFIG_MMU
> >  int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
> > diff --git a/mm/vma_init.c b/mm/vma_init.c
> > new file mode 100644
> > index 000000000000..967ca8517986
> > --- /dev/null
> > +++ b/mm/vma_init.c
> > @@ -0,0 +1,101 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +
> > +/*
> > + * Functions for initialisaing, allocating, freeing and duplicating VMAs. Shared
> > + * between CONFIG_MMU and non-CONFIG_MMU kernel configurations.
> > + */
> > +
> > +#include "vma_internal.h"
> > +#include "vma.h"
> > +
> > +/* SLAB cache for vm_area_struct structures */
> > +static struct kmem_cache *vm_area_cachep;
> > +
> > +void __init vma_state_init(void)
> > +{
> > +	struct kmem_cache_args args = {
> > +		.use_freeptr_offset = true,
> > +		.freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
> > +	};
> > +
> > +	vm_area_cachep = kmem_cache_create("vm_area_struct",
> > +			sizeof(struct vm_area_struct), &args,
> > +			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
> > +			SLAB_ACCOUNT);
> > +}
> > +
> > +struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
> > +{
> > +	struct vm_area_struct *vma;
> > +
> > +	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> > +	if (!vma)
> > +		return NULL;
> > +
> > +	vma_init(vma, mm);
> > +
> > +	return vma;
> > +}
> > +
> > +static void vm_area_init_from(const struct vm_area_struct *src,
> > +			      struct vm_area_struct *dest)
> > +{
> > +	dest->vm_mm = src->vm_mm;
> > +	dest->vm_ops = src->vm_ops;
> > +	dest->vm_start = src->vm_start;
> > +	dest->vm_end = src->vm_end;
> > +	dest->anon_vma = src->anon_vma;
> > +	dest->vm_pgoff = src->vm_pgoff;
> > +	dest->vm_file = src->vm_file;
> > +	dest->vm_private_data = src->vm_private_data;
> > +	vm_flags_init(dest, src->vm_flags);
> > +	memcpy(&dest->vm_page_prot, &src->vm_page_prot,
> > +	       sizeof(dest->vm_page_prot));
> > +	/*
> > +	 * src->shared.rb may be modified concurrently when called from
> > +	 * dup_mmap(), but the clone will reinitialize it.
> > +	 */
> > +	data_race(memcpy(&dest->shared, &src->shared, sizeof(dest->shared)));
> > +	memcpy(&dest->vm_userfaultfd_ctx, &src->vm_userfaultfd_ctx,
> > +	       sizeof(dest->vm_userfaultfd_ctx));
> > +#ifdef CONFIG_ANON_VMA_NAME
> > +	dest->anon_name = src->anon_name;
> > +#endif
> > +#ifdef CONFIG_SWAP
> > +	memcpy(&dest->swap_readahead_info, &src->swap_readahead_info,
> > +	       sizeof(dest->swap_readahead_info));
> > +#endif
> > +#ifndef CONFIG_MMU
> > +	dest->vm_region = src->vm_region;
> > +#endif
> > +#ifdef CONFIG_NUMA
> > +	dest->vm_policy = src->vm_policy;
> > +#endif
> > +}
> > +
> > +struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
> > +{
> > +	struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
> > +
> > +	if (!new)
> > +		return NULL;
> > +
> > +	ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
> > +	ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
> > +	vm_area_init_from(orig, new);
> > +	vma_lock_init(new, true);
> > +	INIT_LIST_HEAD(&new->anon_vma_chain);
> > +	vma_numab_state_init(new);
> > +	dup_anon_vma_name(orig, new);
> > +
> > +	return new;
> > +}
> > +
> > +void vm_area_free(struct vm_area_struct *vma)
> > +{
> > +	/* The vma should be detached while being destroyed. */
> > +	vma_assert_detached(vma);
> > +	vma_numab_state_free(vma);
> > +	free_anon_vma_name(vma);
> > +	kmem_cache_free(vm_area_cachep, vma);
> > +}
> > diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile
> > index 624040fcf193..66f3831a668f 100644
> > --- a/tools/testing/vma/Makefile
> > +++ b/tools/testing/vma/Makefile
> > @@ -9,7 +9,7 @@ include ../shared/shared.mk
> >  OFILES = $(SHARED_OFILES) vma.o maple-shim.o
> >  TARGETS = vma
> >
> > -vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_exec.c ../../../mm/vma.h
> > +vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h
> >
> >  vma:	$(OFILES)
> >  	$(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS)
> > diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
> > index 5832ae5d797d..2be7597a2ac2 100644
> > --- a/tools/testing/vma/vma.c
> > +++ b/tools/testing/vma/vma.c
> > @@ -28,6 +28,7 @@ unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
> >   * Directly import the VMA implementation here. Our vma_internal.h wrapper
> >   * provides userland-equivalent functionality for everything vma.c uses.
> >   */
> > +#include "../../../mm/vma_init.c"
> >  #include "../../../mm/vma_exec.c"
> >  #include "../../../mm/vma.c"
> >
> > @@ -91,6 +92,12 @@ static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma)
> >  	return res;
> >  }
> >
> > +static void detach_free_vma(struct vm_area_struct *vma)
> > +{
> > +	vma_mark_detached(vma);
> > +	vm_area_free(vma);
> > +}
> > +
> >  /* Helper function to allocate a VMA and link it to the tree. */
> >  static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
> >  						 unsigned long start,
> > @@ -104,7 +111,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
> >  		return NULL;
> >
> >  	if (attach_vma(mm, vma)) {
> > -		vm_area_free(vma);
> > +		detach_free_vma(vma);
> >  		return NULL;
> >  	}
> >
> > @@ -249,7 +256,7 @@ static int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi)
> >
> >  	vma_iter_set(vmi, 0);
> >  	for_each_vma(*vmi, vma) {
> > -		vm_area_free(vma);
> > +		detach_free_vma(vma);
> >  		count++;
> >  	}
> >
> > @@ -319,7 +326,7 @@ static bool test_simple_merge(void)
> >  	ASSERT_EQ(vma->vm_pgoff, 0);
> >  	ASSERT_EQ(vma->vm_flags, flags);
> >
> > -	vm_area_free(vma);
> > +	detach_free_vma(vma);
> >  	mtree_destroy(&mm.mm_mt);
> >
> >  	return true;
> > @@ -361,7 +368,7 @@ static bool test_simple_modify(void)
> >  	ASSERT_EQ(vma->vm_end, 0x1000);
> >  	ASSERT_EQ(vma->vm_pgoff, 0);
> >
> > -	vm_area_free(vma);
> > +	detach_free_vma(vma);
> >  	vma_iter_clear(&vmi);
> >
> >  	vma = vma_next(&vmi);
> > @@ -370,7 +377,7 @@ static bool test_simple_modify(void)
> >  	ASSERT_EQ(vma->vm_end, 0x2000);
> >  	ASSERT_EQ(vma->vm_pgoff, 1);
> >
> > -	vm_area_free(vma);
> > +	detach_free_vma(vma);
> >  	vma_iter_clear(&vmi);
> >
> >  	vma = vma_next(&vmi);
> > @@ -379,7 +386,7 @@ static bool test_simple_modify(void)
> >  	ASSERT_EQ(vma->vm_end, 0x3000);
> >  	ASSERT_EQ(vma->vm_pgoff, 2);
> >
> > -	vm_area_free(vma);
> > +	detach_free_vma(vma);
> >  	mtree_destroy(&mm.mm_mt);
> >
> >  	return true;
> > @@ -407,7 +414,7 @@ static bool test_simple_expand(void)
> >  	ASSERT_EQ(vma->vm_end, 0x3000);
> >  	ASSERT_EQ(vma->vm_pgoff, 0);
> >
> > -	vm_area_free(vma);
> > +	detach_free_vma(vma);
> >  	mtree_destroy(&mm.mm_mt);
> >
> >  	return true;
> > @@ -428,7 +435,7 @@ static bool test_simple_shrink(void)
> >  	ASSERT_EQ(vma->vm_end, 0x1000);
> >  	ASSERT_EQ(vma->vm_pgoff, 0);
> >
> > -	vm_area_free(vma);
> > +	detach_free_vma(vma);
> >  	mtree_destroy(&mm.mm_mt);
> >
> >  	return true;
> > @@ -619,7 +626,7 @@ static bool test_merge_new(void)
> >  		ASSERT_EQ(vma->vm_pgoff, 0);
> >  		ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
> >
> > -		vm_area_free(vma);
> > +		detach_free_vma(vma);
> >  		count++;
> >  	}
> >
> > @@ -1668,6 +1675,7 @@ int main(void)
> >  	int num_tests = 0, num_fail = 0;
> >
> >  	maple_tree_init();
> > +	vma_state_init();
> >
> >  #define TEST(name)							\
> >  	do {								\
> > diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
> > index 32e990313158..198abe66de5a 100644
> > --- a/tools/testing/vma/vma_internal.h
> > +++ b/tools/testing/vma/vma_internal.h
> > @@ -155,6 +155,10 @@ typedef __bitwise unsigned int vm_fault_t;
> >   */
> >  #define pr_warn_once pr_err
> >
> > +#define data_race(expr) expr
> > +
> > +#define ASSERT_EXCLUSIVE_WRITER(x)
> > +
> >  struct kref {
> >  	refcount_t refcount;
> >  };
> > @@ -255,6 +259,8 @@ struct file {
> >
> >  #define VMA_LOCK_OFFSET	0x40000000
> >
> > +typedef struct { unsigned long v; } freeptr_t;
> > +
> >  struct vm_area_struct {
> >  	/* The first cache line has the info for VMA tree walking. */
> >
> > @@ -264,9 +270,7 @@ struct vm_area_struct {
> >  			unsigned long vm_start;
> >  			unsigned long vm_end;
> >  		};
> > -#ifdef CONFIG_PER_VMA_LOCK
> > -		struct rcu_head vm_rcu;	/* Used for deferred freeing. */
> > -#endif
> > +		freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
> >  	};
> >
> >  	struct mm_struct *vm_mm;	/* The address space we belong to. */
> > @@ -463,6 +467,65 @@ struct pagetable_move_control {
> >  		.len_in = len_,						\
> >  	}
> >
> > +struct kmem_cache_args {
> > +	/**
> > +	 * @align: The required alignment for the objects.
> > +	 *
> > +	 * %0 means no specific alignment is requested.
> > +	 */
> > +	unsigned int align;
> > +	/**
> > +	 * @useroffset: Usercopy region offset.
> > +	 *
> > +	 * %0 is a valid offset, when @usersize is non-%0
> > +	 */
> > +	unsigned int useroffset;
> > +	/**
> > +	 * @usersize: Usercopy region size.
> > +	 *
> > +	 * %0 means no usercopy region is specified.
> > +	 */
> > +	unsigned int usersize;
> > +	/**
> > +	 * @freeptr_offset: Custom offset for the free pointer
> > +	 * in &SLAB_TYPESAFE_BY_RCU caches
> > +	 *
> > +	 * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
> > +	 * outside of the object. This might cause the object to grow in size.
> > +	 * Cache creators that have a reason to avoid this can specify a custom
> > +	 * free pointer offset in their struct where the free pointer will be
> > +	 * placed.
> > +	 *
> > +	 * Note that placing the free pointer inside the object requires the
> > +	 * caller to ensure that no fields are invalidated that are required to
> > +	 * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
> > +	 * details).
> > +	 *
> > +	 * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
> > +	 * is specified, %use_freeptr_offset must be set %true.
> > +	 *
> > +	 * Note that @ctor currently isn't supported with custom free pointers
> > +	 * as a @ctor requires an external free pointer.
> > +	 */
> > +	unsigned int freeptr_offset;
> > +	/**
> > +	 * @use_freeptr_offset: Whether a @freeptr_offset is used.
> > +	 */
> > +	bool use_freeptr_offset;
> > +	/**
> > +	 * @ctor: A constructor for the objects.
> > +	 *
> > +	 * The constructor is invoked for each object in a newly allocated slab
> > +	 * page. It is the cache user's responsibility to free object in the
> > +	 * same state as after calling the constructor, or deal appropriately
> > +	 * with any differences between a freshly constructed and a reallocated
> > +	 * object.
> > +	 *
> > +	 * %NULL means no constructor.
> > +	 */
> > +	void (*ctor)(void *);
> > +};
> > +
> >  static inline void vma_iter_invalidate(struct vma_iterator *vmi)
> >  {
> >  	mas_pause(&vmi->mas);
> > @@ -547,31 +610,38 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
> >  	vma->vm_lock_seq = UINT_MAX;
> >  }
> >
> > -static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
> > -{
> > -	struct vm_area_struct *vma = calloc(1, sizeof(struct vm_area_struct));
> > +struct kmem_cache {
> > +	const char *name;
> > +	size_t object_size;
> > +	struct kmem_cache_args *args;
> > +};
> >
> > -	if (!vma)
> > -		return NULL;
> > +static inline struct kmem_cache *__kmem_cache_create(const char *name,
> > +						     size_t object_size,
> > +						     struct kmem_cache_args *args)
> > +{
> > +	struct kmem_cache *ret = malloc(sizeof(struct kmem_cache));
> >
> > -	vma_init(vma, mm);
> > +	ret->name = name;
> > +	ret->object_size = object_size;
> > +	ret->args = args;
> >
> > -	return vma;
> > +	return ret;
> >  }
> >
> > -static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
> > -{
> > -	struct vm_area_struct *new = calloc(1, sizeof(struct vm_area_struct));
> > +#define kmem_cache_create(__name, __object_size, __args, ...)           \
> > +	__kmem_cache_create((__name), (__object_size), (__args))
> >
> > -	if (!new)
> > -		return NULL;
> > +static inline void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
> > +{
> > +	(void)gfpflags;
> >
> > -	memcpy(new, orig, sizeof(*new));
> > -	refcount_set(&new->vm_refcnt, 0);
> > -	new->vm_lock_seq = UINT_MAX;
> > -	INIT_LIST_HEAD(&new->anon_vma_chain);
> > +	return calloc(s->object_size, 1);
> > +}
> >
> > -	return new;
> > +static inline void kmem_cache_free(struct kmem_cache *s, void *x)
> > +{
> > +	free(x);
> >  }
> >
> >  /*
> > @@ -738,11 +808,6 @@ static inline void mpol_put(struct mempolicy *)
> >  {
> >  }
> >
> > -static inline void vm_area_free(struct vm_area_struct *vma)
> > -{
> > -	free(vma);
> > -}
> > -
> >  static inline void lru_add_drain(void)
> >  {
> >  }
> > @@ -1312,4 +1377,32 @@ static inline void ksm_exit(struct mm_struct *mm)
> >  	(void)mm;
> >  }
> >
> > +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
> > +{
> > +	(void)vma;
> > +	(void)reset_refcnt;
> > +}
> > +
> > +static inline void vma_numab_state_init(struct vm_area_struct *vma)
> > +{
> > +	(void)vma;
> > +}
> > +
> > +static inline void vma_numab_state_free(struct vm_area_struct *vma)
> > +{
> > +	(void)vma;
> > +}
> > +
> > +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
> > +				     struct vm_area_struct *new_vma)
> > +{
> > +	(void)orig_vma;
> > +	(void)new_vma;
> > +}
> > +
> > +static inline void free_anon_vma_name(struct vm_area_struct *vma)
> > +{
> > +	(void)vma;
> > +}
> > +
> >  #endif	/* __MM_VMA_INTERNAL_H */
> > --
> > 2.49.0
> >