[PATCH v3 3/4] mm: prevent KSM from breaking VMA merging for new VMAs

Lorenzo Stoakes posted 4 patches 8 months, 2 weeks ago
[PATCH v3 3/4] mm: prevent KSM from breaking VMA merging for new VMAs
Posted by Lorenzo Stoakes 8 months, 2 weeks ago
If a user wishes to enable KSM mergeability for an entire process and all
fork/exec'd processes that come after it, they use the prctl()
PR_SET_MEMORY_MERGE operation.

This defaults all newly mapped VMAs to have the VM_MERGEABLE VMA flag set
(in order to indicate they are KSM mergeable), as well as setting this flag
for all existing VMAs and propagating this across fork/exec.

However it also breaks VMA merging for new VMAs, both in the process and
all forked (and fork/exec'd) child processes.

This is because when a new mapping is proposed, the flags specified will
never have VM_MERGEABLE set. However all adjacent VMAs will already have
VM_MERGEABLE set, rendering VMAs unmergeable by default.

To work around this, we try to set the VM_MERGEABLE flag prior to
attempting a merge. In the case of brk() this can always be done.

However on mmap() things are more complicated - while KSM is not supported
for MAP_SHARED file-backed mappings, it is supported for MAP_PRIVATE
file-backed mappings.

These mappings may have deprecated .mmap() callbacks specified which could,
in theory, adjust flags and thus KSM eligibility.

So we check to determine whether this is possible. If not, we set
VM_MERGEABLE prior to the merge attempt on mmap(), otherwise we retain the
previous behaviour.

This fixes VMA merging for all new anonymous mappings, which covers the
majority of real-world cases, so we should see a significant improvement in
VMA mergeability.

For MAP_PRIVATE file-backed mappings, those which implement the
.mmap_prepare() hook and shmem are both known to be safe, so we allow
these, disallowing all other cases.

Also add stubs for newly introduced function invocations to VMA userland
testing.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Fixes: d7597f59d1d3 ("mm: add new api to enable ksm per process") # please no backport!
Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
---
 include/linux/ksm.h              |  8 +++---
 mm/ksm.c                         | 18 ++++++++-----
 mm/vma.c                         | 44 ++++++++++++++++++++++++++++++--
 tools/testing/vma/vma_internal.h | 11 ++++++++
 4 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index d73095b5cd96..51787f0b0208 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -17,8 +17,8 @@
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
-
-void ksm_add_vma(struct vm_area_struct *vma);
+vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
+			 vm_flags_t vm_flags);
 int ksm_enable_merge_any(struct mm_struct *mm);
 int ksm_disable_merge_any(struct mm_struct *mm);
 int ksm_disable(struct mm_struct *mm);
@@ -97,8 +97,10 @@ bool ksm_process_mergeable(struct mm_struct *mm);
 
 #else  /* !CONFIG_KSM */
 
-static inline void ksm_add_vma(struct vm_area_struct *vma)
+static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
+		const struct file *file, vm_flags_t vm_flags)
 {
+	return vm_flags;
 }
 
 static inline int ksm_disable(struct mm_struct *mm)
diff --git a/mm/ksm.c b/mm/ksm.c
index d0c763abd499..18b3690bb69a 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2731,16 +2731,22 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
 	return 0;
 }
 /**
- * ksm_add_vma - Mark vma as mergeable if compatible
+ * ksm_vma_flags - Update VMA flags to mark as mergeable if compatible
  *
- * @vma:  Pointer to vma
+ * @mm:       Proposed VMA's mm_struct
+ * @file:     Proposed VMA's file-backed mapping, if any.
+ * @vm_flags: Proposed VMA"s flags.
+ *
+ * Returns: @vm_flags possibly updated to mark mergeable.
  */
-void ksm_add_vma(struct vm_area_struct *vma)
+vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
+			 vm_flags_t vm_flags)
 {
-	struct mm_struct *mm = vma->vm_mm;
+	if (test_bit(MMF_VM_MERGE_ANY, &mm->flags) &&
+	    __ksm_should_add_vma(file, vm_flags))
+		vm_flags |= VM_MERGEABLE;
 
-	if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
-		__ksm_add_vma(vma);
+	return vm_flags;
 }
 
 static void ksm_add_vmas(struct mm_struct *mm)
diff --git a/mm/vma.c b/mm/vma.c
index 7ebc9eb608f4..3e351beb82ca 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -2490,7 +2490,6 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
 	 */
 	if (!vma_is_anonymous(vma))
 		khugepaged_enter_vma(vma, map->flags);
-	ksm_add_vma(vma);
 	*vmap = vma;
 	return 0;
 
@@ -2593,6 +2592,40 @@ static void set_vma_user_defined_fields(struct vm_area_struct *vma,
 	vma->vm_private_data = map->vm_private_data;
 }
 
+static void update_ksm_flags(struct mmap_state *map)
+{
+	map->flags = ksm_vma_flags(map->mm, map->file, map->flags);
+}
+
+/*
+ * Are we guaranteed no driver can change state such as to preclude KSM merging?
+ * If so, let's set the KSM mergeable flag early so we don't break VMA merging.
+ */
+static bool can_set_ksm_flags_early(struct mmap_state *map)
+{
+	struct file *file = map->file;
+
+	/* Anonymous mappings have no driver which can change them. */
+	if (!file)
+		return true;
+
+	/*
+	 * If .mmap_prepare() is specified, then the driver will have already
+	 * manipulated state prior to updating KSM flags. So no need to worry
+	 * about mmap callbacks modifying VMA flags after the KSM flag has been
+	 * updated here, which could otherwise affect KSM eligibility.
+	 */
+	if (file->f_op->mmap_prepare)
+		return true;
+
+	/* shmem is safe. */
+	if (shmem_file(file))
+		return true;
+
+	/* Any other .mmap callback is not safe. */
+	return false;
+}
+
 static unsigned long __mmap_region(struct file *file, unsigned long addr,
 		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
 		struct list_head *uf)
@@ -2603,6 +2636,7 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
 	bool have_mmap_prepare = file && file->f_op->mmap_prepare;
 	VMA_ITERATOR(vmi, mm, addr);
 	MMAP_STATE(map, mm, &vmi, addr, len, pgoff, vm_flags, file);
+	bool check_ksm_early = can_set_ksm_flags_early(&map);
 
 	error = __mmap_prepare(&map, uf);
 	if (!error && have_mmap_prepare)
@@ -2610,6 +2644,9 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
 	if (error)
 		goto abort_munmap;
 
+	if (check_ksm_early)
+		update_ksm_flags(&map);
+
 	/* Attempt to merge with adjacent VMAs... */
 	if (map.prev || map.next) {
 		VMG_MMAP_STATE(vmg, &map, /* vma = */ NULL);
@@ -2619,6 +2656,9 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
 
 	/* ...but if we can't, allocate a new VMA. */
 	if (!vma) {
+		if (!check_ksm_early)
+			update_ksm_flags(&map);
+
 		error = __mmap_new_vma(&map, &vma);
 		if (error)
 			goto unacct_error;
@@ -2721,6 +2761,7 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	 * Note: This happens *after* clearing old mappings in some code paths.
 	 */
 	flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+	flags = ksm_vma_flags(mm, NULL, flags);
 	if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
 		return -ENOMEM;
 
@@ -2764,7 +2805,6 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
 
 	mm->map_count++;
 	validate_mm(mm);
-	ksm_add_vma(vma);
 out:
 	perf_event_mmap(vma);
 	mm->total_vm += len >> PAGE_SHIFT;
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 4505b1c31be1..77b2949d874a 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -1468,4 +1468,15 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
 	(void)vma;
 }
 
+static inline bool shmem_file(struct file *)
+{
+	return false;
+}
+
+static inline vm_flags_t ksm_vma_flags(const struct mm_struct *, const struct file *,
+			 vm_flags_t vm_flags)
+{
+	return vm_flags;
+}
+
 #endif	/* __MM_VMA_INTERNAL_H */
-- 
2.49.0
Re: [PATCH v3 3/4] mm: prevent KSM from breaking VMA merging for new VMAs
Posted by Lorenzo Stoakes 7 months, 3 weeks ago
Hi Andrew,

Sending a fix-patch for this commit due to a reported syzbot issue which
highlighted a bug in the implementation.

I discuss the syzbot report at [0].

[0]: https://lore.kernel.org/all/a55beb72-4288-4356-9642-76ab35a2a07c@lucifer.local/

There's a very minor conflict around the map->vm_flags vs. map->flags change,
easily resolvable, but if you need a respin let me know.

I ran through all mm self tests included the newly introduced one in 4/4 and all
good.

Thanks, Lorenzo

----8<----
From 4d9dde3013837595d733b5059c2d6474261654d6 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Fri, 20 Jun 2025 13:21:03 +0100
Subject: [PATCH] mm/vma: correctly invoke late KSM check after mmap hook

Previously we erroneously checked whether KSM was applicable prior to
invoking the f_op->mmap() hook in the case of not being able to perform
this check early.

This is problematic, as filesystems such as hugetlb, which use anonymous
memory and might otherwise get KSM'd, set VM_HUGETLB in the f_op->mmap()
hook.

Correct this by checking at the appropriate time.

Reported-by: syzbot+a74a028d848147bc5931@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/6853fc57.a00a0220.137b3.0009.GAE@google.com/
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 mm/vma.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/mm/vma.c b/mm/vma.c
index 4abed296d882..eccc4e0b4d32 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -32,6 +32,9 @@ struct mmap_state {
 	struct vma_munmap_struct vms;
 	struct ma_state mas_detach;
 	struct maple_tree mt_detach;
+
+	/* Determine if we can check KSM flags early in mmap() logic. */
+	bool check_ksm_early;
 };

 #define MMAP_STATE(name, mm_, vmi_, addr_, len_, pgoff_, vm_flags_, file_) \
@@ -2334,6 +2337,11 @@ static void vms_abort_munmap_vmas(struct vma_munmap_struct *vms,
 	vms_complete_munmap_vmas(vms, mas_detach);
 }

+static void update_ksm_flags(struct mmap_state *map)
+{
+	map->vm_flags = ksm_vma_flags(map->mm, map->file, map->vm_flags);
+}
+
 /*
  * __mmap_prepare() - Prepare to gather any overlapping VMAs that need to be
  * unmapped once the map operation is completed, check limits, account mapping
@@ -2438,6 +2446,7 @@ static int __mmap_new_file_vma(struct mmap_state *map,
 			!(map->vm_flags & VM_MAYWRITE) &&
 			(vma->vm_flags & VM_MAYWRITE));

+	map->file = vma->vm_file;
 	map->vm_flags = vma->vm_flags;

 	return 0;
@@ -2487,6 +2496,11 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
 	if (error)
 		goto free_iter_vma;

+	if (!map->check_ksm_early) {
+		update_ksm_flags(map);
+		vm_flags_init(vma, map->vm_flags);
+	}
+
 #ifdef CONFIG_SPARC64
 	/* TODO: Fix SPARC ADI! */
 	WARN_ON_ONCE(!arch_validate_flags(map->vm_flags));
@@ -2606,11 +2620,6 @@ static void set_vma_user_defined_fields(struct vm_area_struct *vma,
 	vma->vm_private_data = map->vm_private_data;
 }

-static void update_ksm_flags(struct mmap_state *map)
-{
-	map->vm_flags = ksm_vma_flags(map->mm, map->file, map->vm_flags);
-}
-
 /*
  * Are we guaranteed no driver can change state such as to preclude KSM merging?
  * If so, let's set the KSM mergeable flag early so we don't break VMA merging.
@@ -2650,7 +2659,8 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
 	bool have_mmap_prepare = file && file->f_op->mmap_prepare;
 	VMA_ITERATOR(vmi, mm, addr);
 	MMAP_STATE(map, mm, &vmi, addr, len, pgoff, vm_flags, file);
-	bool check_ksm_early = can_set_ksm_flags_early(&map);
+
+	map.check_ksm_early = can_set_ksm_flags_early(&map);

 	error = __mmap_prepare(&map, uf);
 	if (!error && have_mmap_prepare)
@@ -2658,7 +2668,7 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
 	if (error)
 		goto abort_munmap;

-	if (check_ksm_early)
+	if (map.check_ksm_early)
 		update_ksm_flags(&map);

 	/* Attempt to merge with adjacent VMAs... */
@@ -2670,9 +2680,6 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,

 	/* ...but if we can't, allocate a new VMA. */
 	if (!vma) {
-		if (!check_ksm_early)
-			update_ksm_flags(&map);
-
 		error = __mmap_new_vma(&map, &vma);
 		if (error)
 			goto unacct_error;
--
2.49.0
Re: [PATCH v3 3/4] mm: prevent KSM from breaking VMA merging for new VMAs
Posted by Vlastimil Babka 7 months, 3 weeks ago
On 6/20/25 14:48, Lorenzo Stoakes wrote:
> Hi Andrew,
> 
> Sending a fix-patch for this commit due to a reported syzbot issue which
> highlighted a bug in the implementation.
> 
> I discuss the syzbot report at [0].
> 
> [0]: https://lore.kernel.org/all/a55beb72-4288-4356-9642-76ab35a2a07c@lucifer.local/
> 
> There's a very minor conflict around the map->vm_flags vs. map->flags change,
> easily resolvable, but if you need a respin let me know.
> 
> I ran through all mm self tests included the newly introduced one in 4/4 and all
> good.
> 
> Thanks, Lorenzo
> 
> ----8<----
> From 4d9dde3013837595d733b5059c2d6474261654d6 Mon Sep 17 00:00:00 2001
> From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Date: Fri, 20 Jun 2025 13:21:03 +0100
> Subject: [PATCH] mm/vma: correctly invoke late KSM check after mmap hook
> 
> Previously we erroneously checked whether KSM was applicable prior to
> invoking the f_op->mmap() hook in the case of not being able to perform
> this check early.
> 
> This is problematic, as filesystems such as hugetlb, which use anonymous
> memory and might otherwise get KSM'd, set VM_HUGETLB in the f_op->mmap()
> hook.
> 
> Correct this by checking at the appropriate time.
> 
> Reported-by: syzbot+a74a028d848147bc5931@syzkaller.appspotmail.com
> Closes: https://lore.kernel.org/all/6853fc57.a00a0220.137b3.0009.GAE@google.com/
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

I've checked the version in mm tree, LGTM.

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Re: [PATCH v3 3/4] mm: prevent KSM from breaking VMA merging for new VMAs
Posted by Lorenzo Stoakes 7 months, 3 weeks ago
On Mon, Jun 23, 2025 at 10:37:53AM +0200, Vlastimil Babka wrote:
> On 6/20/25 14:48, Lorenzo Stoakes wrote:
> > Hi Andrew,
> >
> > Sending a fix-patch for this commit due to a reported syzbot issue which
> > highlighted a bug in the implementation.
> >
> > I discuss the syzbot report at [0].
> >
> > [0]: https://lore.kernel.org/all/a55beb72-4288-4356-9642-76ab35a2a07c@lucifer.local/
> >
> > There's a very minor conflict around the map->vm_flags vs. map->flags change,
> > easily resolvable, but if you need a respin let me know.
> >
> > I ran through all mm self tests included the newly introduced one in 4/4 and all
> > good.
> >
> > Thanks, Lorenzo
> >
> > ----8<----
> > From 4d9dde3013837595d733b5059c2d6474261654d6 Mon Sep 17 00:00:00 2001
> > From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > Date: Fri, 20 Jun 2025 13:21:03 +0100
> > Subject: [PATCH] mm/vma: correctly invoke late KSM check after mmap hook
> >
> > Previously we erroneously checked whether KSM was applicable prior to
> > invoking the f_op->mmap() hook in the case of not being able to perform
> > this check early.
> >
> > This is problematic, as filesystems such as hugetlb, which use anonymous
> > memory and might otherwise get KSM'd, set VM_HUGETLB in the f_op->mmap()
> > hook.
> >
> > Correct this by checking at the appropriate time.
> >
> > Reported-by: syzbot+a74a028d848147bc5931@syzkaller.appspotmail.com
> > Closes: https://lore.kernel.org/all/6853fc57.a00a0220.137b3.0009.GAE@google.com/
> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>
> I've checked the version in mm tree, LGTM.
>
> Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
>

Cheers, appreciated!
Re: [PATCH v3 3/4] mm: prevent KSM from breaking VMA merging for new VMAs
Posted by Andrew Morton 7 months, 3 weeks ago
On Fri, 20 Jun 2025 13:48:09 +0100 Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote:

> Hi Andrew,
> 
> Sending a fix-patch for this commit due to a reported syzbot issue which
> highlighted a bug in the implementation.
> 
> I discuss the syzbot report at [0].
> 
> [0]: https://lore.kernel.org/all/a55beb72-4288-4356-9642-76ab35a2a07c@lucifer.local/
> 
> There's a very minor conflict around the map->vm_flags vs. map->flags change,
> easily resolvable, but if you need a respin let me know.

I actually saw 4 conflicts, fixed various things up and...

> @@ -2487,6 +2496,11 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
>  	if (error)
>  		goto free_iter_vma;
> 
> +	if (!map->check_ksm_early) {
> +		update_ksm_flags(map);
> +		vm_flags_init(vma, map->vm_flags);
> +	}
> +

Guessing map->flags was intended here, I made that change then unmade
it in the later mm-update-core-kernel-code-to-use-vm_flags_t-consistently.patch.

I'll do a full rebuild at a couple of bisection points, please check
that all landed OK.
Re: [PATCH v3 3/4] mm: prevent KSM from breaking VMA merging for new VMAs
Posted by Lorenzo Stoakes 7 months, 3 weeks ago
On Sun, Jun 22, 2025 at 12:39:31PM -0700, Andrew Morton wrote:
> On Fri, 20 Jun 2025 13:48:09 +0100 Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote:
>
> > Hi Andrew,
> >
> > Sending a fix-patch for this commit due to a reported syzbot issue which
> > highlighted a bug in the implementation.
> >
> > I discuss the syzbot report at [0].
> >
> > [0]: https://lore.kernel.org/all/a55beb72-4288-4356-9642-76ab35a2a07c@lucifer.local/
> >
> > There's a very minor conflict around the map->vm_flags vs. map->flags change,
> > easily resolvable, but if you need a respin let me know.
>
> I actually saw 4 conflicts, fixed various things up and...
>
> > @@ -2487,6 +2496,11 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
> >  	if (error)
> >  		goto free_iter_vma;
> >
> > +	if (!map->check_ksm_early) {
> > +		update_ksm_flags(map);
> > +		vm_flags_init(vma, map->vm_flags);
> > +	}
> > +
>
> Guessing map->flags was intended here, I made that change then unmade
> it in the later mm-update-core-kernel-code-to-use-vm_flags_t-consistently.patch.
>
> I'll do a full rebuild at a couple of bisection points, please check
> that all landed OK.
>

Thanks, appreciate it, apologies for the inconveniece! It all looks good to me
from my side.

Cheers, Lorenzo
Re: [PATCH v3 3/4] mm: prevent KSM from breaking VMA merging for new VMAs
Posted by xu.xin16@zte.com.cn 8 months, 2 weeks ago
> If a user wishes to enable KSM mergeability for an entire process and all
> fork/exec'd processes that come after it, they use the prctl()
> PR_SET_MEMORY_MERGE operation.
> 
> This defaults all newly mapped VMAs to have the VM_MERGEABLE VMA flag set
> (in order to indicate they are KSM mergeable), as well as setting this flag
> for all existing VMAs and propagating this across fork/exec.
> 
> However it also breaks VMA merging for new VMAs, both in the process and
> all forked (and fork/exec'd) child processes.
> 
> This is because when a new mapping is proposed, the flags specified will
> never have VM_MERGEABLE set. However all adjacent VMAs will already have
> VM_MERGEABLE set, rendering VMAs unmergeable by default.
> 
> To work around this, we try to set the VM_MERGEABLE flag prior to
> attempting a merge. In the case of brk() this can always be done.
> 
> However on mmap() things are more complicated - while KSM is not supported
> for MAP_SHARED file-backed mappings, it is supported for MAP_PRIVATE
> file-backed mappings.
> 
> These mappings may have deprecated .mmap() callbacks specified which could,
> in theory, adjust flags and thus KSM eligibility.
> 
> So we check to determine whether this is possible. If not, we set
> VM_MERGEABLE prior to the merge attempt on mmap(), otherwise we retain the
> previous behaviour.
> 
> This fixes VMA merging for all new anonymous mappings, which covers the
> majority of real-world cases, so we should see a significant improvement in
> VMA mergeability.
> 
> For MAP_PRIVATE file-backed mappings, those which implement the
> .mmap_prepare() hook and shmem are both known to be safe, so we allow
> these, disallowing all other cases.
> 
> Also add stubs for newly introduced function invocations to VMA userland
> testing.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Fixes: d7597f59d1d3 ("mm: add new api to enable ksm per process") # please no backport!
> Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>
> Acked-by: David Hildenbrand <david@redhat.com>
> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>


Thanks, everything looks clearer to me.
Reviewed-by: Xu Xin <xu.xin16@zte.com.cn>





> ---
>  include/linux/ksm.h              |  8 +++---
>  mm/ksm.c                         | 18 ++++++++-----
>  mm/vma.c                         | 44 ++++++++++++++++++++++++++++++--
>  tools/testing/vma/vma_internal.h | 11 ++++++++
>  4 files changed, 70 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/ksm.h b/include/linux/ksm.h
> index d73095b5cd96..51787f0b0208 100644
> --- a/include/linux/ksm.h
> +++ b/include/linux/ksm.h
> @@ -17,8 +17,8 @@
>  #ifdef CONFIG_KSM
>  int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
>  		unsigned long end, int advice, unsigned long *vm_flags);
> -
> -void ksm_add_vma(struct vm_area_struct *vma);
> +vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
> +			 vm_flags_t vm_flags);
>  int ksm_enable_merge_any(struct mm_struct *mm);
>  int ksm_disable_merge_any(struct mm_struct *mm);
>  int ksm_disable(struct mm_struct *mm);
> @@ -97,8 +97,10 @@ bool ksm_process_mergeable(struct mm_struct *mm);
>  
>  #else  /* !CONFIG_KSM */
>  
> -static inline void ksm_add_vma(struct vm_area_struct *vma)
> +static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
> +		const struct file *file, vm_flags_t vm_flags)
>  {
> +	return vm_flags;
>  }
>  
>  static inline int ksm_disable(struct mm_struct *mm)
> diff --git a/mm/ksm.c b/mm/ksm.c
> index d0c763abd499..18b3690bb69a 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -2731,16 +2731,22 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
>  	return 0;
>  }
>  /**
> - * ksm_add_vma - Mark vma as mergeable if compatible
> + * ksm_vma_flags - Update VMA flags to mark as mergeable if compatible
>   *
> - * @vma:  Pointer to vma
> + * @mm:       Proposed VMA's mm_struct
> + * @file:     Proposed VMA's file-backed mapping, if any.
> + * @vm_flags: Proposed VMA"s flags.
> + *
> + * Returns: @vm_flags possibly updated to mark mergeable.
>   */
> -void ksm_add_vma(struct vm_area_struct *vma)
> +vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
> +			 vm_flags_t vm_flags)
>  {
> -	struct mm_struct *mm = vma->vm_mm;
> +	if (test_bit(MMF_VM_MERGE_ANY, &mm->flags) &&
> +	    __ksm_should_add_vma(file, vm_flags))
> +		vm_flags |= VM_MERGEABLE;
>  
> -	if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
> -		__ksm_add_vma(vma);
> +	return vm_flags;
>  }
>  
>  static void ksm_add_vmas(struct mm_struct *mm)
> diff --git a/mm/vma.c b/mm/vma.c
> index 7ebc9eb608f4..3e351beb82ca 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -2490,7 +2490,6 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
>  	 */
>  	if (!vma_is_anonymous(vma))
>  		khugepaged_enter_vma(vma, map->flags);
> -	ksm_add_vma(vma);
>  	*vmap = vma;
>  	return 0;
>  
> @@ -2593,6 +2592,40 @@ static void set_vma_user_defined_fields(struct vm_area_struct *vma,
>  	vma->vm_private_data = map->vm_private_data;
>  }
>  
> +static void update_ksm_flags(struct mmap_state *map)
> +{
> +	map->flags = ksm_vma_flags(map->mm, map->file, map->flags);
> +}
> +
> +/*
> + * Are we guaranteed no driver can change state such as to preclude KSM merging?
> + * If so, let's set the KSM mergeable flag early so we don't break VMA merging.
> + */
> +static bool can_set_ksm_flags_early(struct mmap_state *map)
> +{
> +	struct file *file = map->file;
> +
> +	/* Anonymous mappings have no driver which can change them. */
> +	if (!file)
> +		return true;
> +
> +	/*
> +	 * If .mmap_prepare() is specified, then the driver will have already
> +	 * manipulated state prior to updating KSM flags. So no need to worry
> +	 * about mmap callbacks modifying VMA flags after the KSM flag has been
> +	 * updated here, which could otherwise affect KSM eligibility.
> +	 */
> +	if (file->f_op->mmap_prepare)
> +		return true;
> +
> +	/* shmem is safe. */
> +	if (shmem_file(file))
> +		return true;
> +
> +	/* Any other .mmap callback is not safe. */
> +	return false;
> +}
> +
>  static unsigned long __mmap_region(struct file *file, unsigned long addr,
>  		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
>  		struct list_head *uf)
> @@ -2603,6 +2636,7 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
>  	bool have_mmap_prepare = file && file->f_op->mmap_prepare;
>  	VMA_ITERATOR(vmi, mm, addr);
>  	MMAP_STATE(map, mm, &vmi, addr, len, pgoff, vm_flags, file);
> +	bool check_ksm_early = can_set_ksm_flags_early(&map);
>  
>  	error = __mmap_prepare(&map, uf);
>  	if (!error && have_mmap_prepare)
> @@ -2610,6 +2644,9 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
>  	if (error)
>  		goto abort_munmap;
>  
> +	if (check_ksm_early)
> +		update_ksm_flags(&map);
> +
>  	/* Attempt to merge with adjacent VMAs... */
>  	if (map.prev || map.next) {
>  		VMG_MMAP_STATE(vmg, &map, /* vma = */ NULL);
> @@ -2619,6 +2656,9 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
>  
>  	/* ...but if we can't, allocate a new VMA. */
>  	if (!vma) {
> +		if (!check_ksm_early)
> +			update_ksm_flags(&map);
> +
>  		error = __mmap_new_vma(&map, &vma);
>  		if (error)
>  			goto unacct_error;
> @@ -2721,6 +2761,7 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
>  	 * Note: This happens *after* clearing old mappings in some code paths.
>  	 */
>  	flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
> +	flags = ksm_vma_flags(mm, NULL, flags);
>  	if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
>  		return -ENOMEM;
>  
> @@ -2764,7 +2805,6 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
>  
>  	mm->map_count++;
>  	validate_mm(mm);
> -	ksm_add_vma(vma);
>  out:
>  	perf_event_mmap(vma);
>  	mm->total_vm += len >> PAGE_SHIFT;
> diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
> index 4505b1c31be1..77b2949d874a 100644
> --- a/tools/testing/vma/vma_internal.h
> +++ b/tools/testing/vma/vma_internal.h
> @@ -1468,4 +1468,15 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
>  	(void)vma;
>  }
>  
> +static inline bool shmem_file(struct file *)
> +{
> +	return false;
> +}
> +
> +static inline vm_flags_t ksm_vma_flags(const struct mm_struct *, const struct file *,
> +			 vm_flags_t vm_flags)
> +{
> +	return vm_flags;
> +}
> +
>  #endif	/* __MM_VMA_INTERNAL_H */
> -- 
> 2.49.0
Re: [PATCH v3 3/4] mm: prevent KSM from breaking VMA merging for new VMAs
Posted by Vlastimil Babka 8 months, 2 weeks ago
On 5/29/25 19:15, Lorenzo Stoakes wrote:
> If a user wishes to enable KSM mergeability for an entire process and all
> fork/exec'd processes that come after it, they use the prctl()
> PR_SET_MEMORY_MERGE operation.
> 
> This defaults all newly mapped VMAs to have the VM_MERGEABLE VMA flag set
> (in order to indicate they are KSM mergeable), as well as setting this flag
> for all existing VMAs and propagating this across fork/exec.
> 
> However it also breaks VMA merging for new VMAs, both in the process and
> all forked (and fork/exec'd) child processes.
> 
> This is because when a new mapping is proposed, the flags specified will
> never have VM_MERGEABLE set. However all adjacent VMAs will already have
> VM_MERGEABLE set, rendering VMAs unmergeable by default.
> 
> To work around this, we try to set the VM_MERGEABLE flag prior to
> attempting a merge. In the case of brk() this can always be done.
> 
> However on mmap() things are more complicated - while KSM is not supported
> for MAP_SHARED file-backed mappings, it is supported for MAP_PRIVATE
> file-backed mappings.
> 
> These mappings may have deprecated .mmap() callbacks specified which could,
> in theory, adjust flags and thus KSM eligibility.
> 
> So we check to determine whether this is possible. If not, we set
> VM_MERGEABLE prior to the merge attempt on mmap(), otherwise we retain the
> previous behaviour.
> 
> This fixes VMA merging for all new anonymous mappings, which covers the
> majority of real-world cases, so we should see a significant improvement in
> VMA mergeability.
> 
> For MAP_PRIVATE file-backed mappings, those which implement the
> .mmap_prepare() hook and shmem are both known to be safe, so we allow
> these, disallowing all other cases.
> 
> Also add stubs for newly introduced function invocations to VMA userland
> testing.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Fixes: d7597f59d1d3 ("mm: add new api to enable ksm per process") # please no backport!
> Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>
> Acked-by: David Hildenbrand <david@redhat.com>
> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>

The commit log is much clearer to me now, thanks :)

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>