Building on the vma_count helpers, add a VM_WARN_ON_ONCE() to detect
cases where the VMA count exceeds the sysctl_max_map_count limit.
This check will help catch future bugs or regressions where
the VMAs are allocated exceeding the limit.
The warning is placed in the main vma_count_*() helpers, while the
internal *_nocheck variants bypass it. _nocheck helpers are used to
ensure that the assertion does not trigger a false positive in
the legitimate case of a temporary VMA increase past the limit
by a VMA split in munmap().
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Pedro Falcato <pfalcato@suse.de>
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
---
Changes in v2:
- Add assertions if exceeding max_vma_count limit, per Pedro
include/linux/mm.h | 12 ++++++--
mm/internal.h | 1 -
mm/vma.c | 49 +++++++++++++++++++++++++-------
tools/testing/vma/vma_internal.h | 7 ++++-
4 files changed, 55 insertions(+), 14 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8bad1454984c..3a3749d7015c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4219,19 +4219,27 @@ static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps)
void snapshot_page(struct page_snapshot *ps, const struct page *page);
+int vma_count_remaining(const struct mm_struct *mm);
+
static inline void vma_count_init(struct mm_struct *mm)
{
ACCESS_PRIVATE(mm, __vma_count) = 0;
}
-static inline void vma_count_add(struct mm_struct *mm, int nr_vmas)
+static inline void __vma_count_add_nocheck(struct mm_struct *mm, int nr_vmas)
{
ACCESS_PRIVATE(mm, __vma_count) += nr_vmas;
}
+static inline void vma_count_add(struct mm_struct *mm, int nr_vmas)
+{
+ VM_WARN_ON_ONCE(!vma_count_remaining(mm));
+ __vma_count_add_nocheck(mm, nr_vmas);
+}
+
static inline void vma_count_sub(struct mm_struct *mm, int nr_vmas)
{
- vma_count_add(mm, -nr_vmas);
+ __vma_count_add_nocheck(mm, -nr_vmas);
}
static inline void vma_count_inc(struct mm_struct *mm)
diff --git a/mm/internal.h b/mm/internal.h
index 39f1c9535ae5..e0567a3b64fa 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1661,6 +1661,5 @@ static inline bool reclaim_pt_is_enabled(unsigned long start, unsigned long end,
void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm);
int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm);
-int vma_count_remaining(const struct mm_struct *mm);
#endif /* __MM_INTERNAL_H */
diff --git a/mm/vma.c b/mm/vma.c
index 0cd3cb472220..0e4fcaebe209 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -323,15 +323,17 @@ static void vma_prepare(struct vma_prepare *vp)
}
/*
- * vma_complete- Helper function for handling the unlocking after altering VMAs,
- * or for inserting a VMA.
+ * This is the internal, unsafe version of vma_complete(). Unlike its
+ * wrapper, this function bypasses runtime checks for VMA count limits by
+ * using the _nocheck vma_count* helpers.
*
- * @vp: The vma_prepare struct
- * @vmi: The vma iterator
- * @mm: The mm_struct
+ * Its use is restricted to __split_vma() where the VMA count can be
+ * temporarily higher than the sysctl_max_map_count limit.
+ *
+ * All other callers must use vma_complete().
*/
-static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi,
- struct mm_struct *mm)
+static void __vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi,
+ struct mm_struct *mm)
{
if (vp->file) {
if (vp->adj_next)
@@ -352,7 +354,11 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi,
* (it may either follow vma or precede it).
*/
vma_iter_store_new(vmi, vp->insert);
- vma_count_inc(mm);
+ /*
+ * Explicitly allow vma_count to exceed the threshold to prevent,
+ * blocking munmap() freeing resources.
+ */
+ __vma_count_add_nocheck(mm, 1);
}
if (vp->anon_vma) {
@@ -403,6 +409,26 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi,
uprobe_mmap(vp->insert);
}
+/*
+ * vma_complete- Helper function for handling the unlocking after altering VMAs,
+ * or for inserting a VMA.
+ *
+ * @vp: The vma_prepare struct
+ * @vmi: The vma iterator
+ * @mm: The mm_struct
+ */
+static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi,
+ struct mm_struct *mm)
+{
+ /*
+ * __vma_complete() explicitly foregoes checking the new
+ * vma_count against the sysctl_max_map_count limit, so
+ * do it here.
+ */
+ VM_WARN_ON_ONCE(!vma_count_remaining(mm));
+ __vma_complete(vp, vmi, mm);
+}
+
/*
* init_vma_prep() - Initializer wrapper for vma_prepare struct
* @vp: The vma_prepare struct
@@ -564,8 +590,11 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
vma->vm_end = addr;
}
- /* vma_complete stores the new vma */
- vma_complete(&vp, vmi, vma->vm_mm);
+ /*
+ * __vma_complete stores the new vma without checking against the
+ * sysctl_max_map_count (vma_count) limit.
+ */
+ __vma_complete(&vp, vmi, vma->vm_mm);
validate_mm(vma->vm_mm);
/* Success. */
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 6e724ba1adf4..d084b1eb2a5c 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -1534,11 +1534,16 @@ static inline void vma_count_init(struct mm_struct *mm)
mm->__vma_count = 0;
}
-static inline void vma_count_add(struct mm_struct *mm, int nr_vmas)
+static inline void __vma_count_add_nocheck(struct mm_struct *mm, int nr_vmas)
{
mm->__vma_count += nr_vmas;
}
+static inline void vma_count_add(struct mm_struct *mm, int nr_vmas)
+{
+ __vma_count_add_nocheck(mm, nr_vmas);
+}
+
static inline void vma_count_sub(struct mm_struct *mm, int nr_vmas)
{
vma_count_add(mm, -nr_vmas);
--
2.51.0.384.g4c02a37b29-goog
I see that the review is generally to drop this one so it's moot, but this is also not applying: Patch failed at 0006 mm: add assertion for VMA count limit hint: Use 'git am --show-current-patch=diff' to see the failed patch hint: When you have resolved this problem, run "git am --continue". hint: If you prefer to skip this patch, run "git am --skip" instead. hint: To restore the original branch and stop patching, run "git am --abort". hint: Disable this message with "git config set advice.mergeConflict false" When trying to b4 shazam in mm-new.
On 15.09.25 18:36, Kalesh Singh wrote: > Building on the vma_count helpers, add a VM_WARN_ON_ONCE() to detect > cases where the VMA count exceeds the sysctl_max_map_count limit. > > This check will help catch future bugs or regressions where > the VMAs are allocated exceeding the limit. > > The warning is placed in the main vma_count_*() helpers, while the > internal *_nocheck variants bypass it. _nocheck helpers are used to > ensure that the assertion does not trigger a false positive in > the legitimate case of a temporary VMA increase past the limit > by a VMA split in munmap(). > > Cc: Andrew Morton <akpm@linux-foundation.org> > Cc: David Hildenbrand <david@redhat.com> > Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com> > Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > Cc: Mike Rapoport <rppt@kernel.org> > Cc: Minchan Kim <minchan@kernel.org> > Cc: Pedro Falcato <pfalcato@suse.de> > Signed-off-by: Kalesh Singh <kaleshsingh@google.com> > --- > > Changes in v2: > - Add assertions if exceeding max_vma_count limit, per Pedro > > include/linux/mm.h | 12 ++++++-- > mm/internal.h | 1 - > mm/vma.c | 49 +++++++++++++++++++++++++------- > tools/testing/vma/vma_internal.h | 7 ++++- > 4 files changed, 55 insertions(+), 14 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 8bad1454984c..3a3749d7015c 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -4219,19 +4219,27 @@ static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps) > > void snapshot_page(struct page_snapshot *ps, const struct page *page); > > +int vma_count_remaining(const struct mm_struct *mm); > + > static inline void vma_count_init(struct mm_struct *mm) > { > ACCESS_PRIVATE(mm, __vma_count) = 0; > } > > -static inline void vma_count_add(struct mm_struct *mm, int nr_vmas) > +static inline void __vma_count_add_nocheck(struct mm_struct *mm, int nr_vmas) > { > ACCESS_PRIVATE(mm, __vma_count) += nr_vmas; > } > > +static inline void vma_count_add(struct mm_struct *mm, int nr_vmas) > +{ > + VM_WARN_ON_ONCE(!vma_count_remaining(mm)); Can't that fire when changing the max count from user space at just the wrong time? I assume we'll have to tolerated that and might just want to drop this patch from the series. -- Cheers David / dhildenb
On Wed, Sep 17, 2025 at 03:44:27PM +0200, David Hildenbrand wrote: > On 15.09.25 18:36, Kalesh Singh wrote: > > Building on the vma_count helpers, add a VM_WARN_ON_ONCE() to detect > > cases where the VMA count exceeds the sysctl_max_map_count limit. > > > > This check will help catch future bugs or regressions where > > the VMAs are allocated exceeding the limit. > > > > The warning is placed in the main vma_count_*() helpers, while the > > internal *_nocheck variants bypass it. _nocheck helpers are used to > > ensure that the assertion does not trigger a false positive in > > the legitimate case of a temporary VMA increase past the limit > > by a VMA split in munmap(). > > > > Cc: Andrew Morton <akpm@linux-foundation.org> > > Cc: David Hildenbrand <david@redhat.com> > > Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com> > > Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > Cc: Mike Rapoport <rppt@kernel.org> > > Cc: Minchan Kim <minchan@kernel.org> > > Cc: Pedro Falcato <pfalcato@suse.de> > > Signed-off-by: Kalesh Singh <kaleshsingh@google.com> > > --- > > > > Changes in v2: > > - Add assertions if exceeding max_vma_count limit, per Pedro > > > > include/linux/mm.h | 12 ++++++-- > > mm/internal.h | 1 - > > mm/vma.c | 49 +++++++++++++++++++++++++------- > > tools/testing/vma/vma_internal.h | 7 ++++- > > 4 files changed, 55 insertions(+), 14 deletions(-) > > > > diff --git a/include/linux/mm.h b/include/linux/mm.h > > index 8bad1454984c..3a3749d7015c 100644 > > --- a/include/linux/mm.h > > +++ b/include/linux/mm.h > > @@ -4219,19 +4219,27 @@ static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps) > > void snapshot_page(struct page_snapshot *ps, const struct page *page); > > +int vma_count_remaining(const struct mm_struct *mm); > > + > > static inline void vma_count_init(struct mm_struct *mm) > > { > > ACCESS_PRIVATE(mm, __vma_count) = 0; > > } > > -static inline void vma_count_add(struct mm_struct *mm, int nr_vmas) > > +static inline void __vma_count_add_nocheck(struct mm_struct *mm, int nr_vmas) > > { > > ACCESS_PRIVATE(mm, __vma_count) += nr_vmas; > > } > > +static inline void vma_count_add(struct mm_struct *mm, int nr_vmas) > > +{ > > + VM_WARN_ON_ONCE(!vma_count_remaining(mm)); > > Can't that fire when changing the max count from user space at just the > wrong time? > > I assume we'll have to tolerated that and might just want to drop this patch > from the series. Ah yes, of course, userspace can dynamically change it. Good catch. I guess we'll need to kill the assertion idea then. -- Pedro
On Wed, Sep 17, 2025 at 6:44 AM David Hildenbrand <david@redhat.com> wrote: > > On 15.09.25 18:36, Kalesh Singh wrote: > > Building on the vma_count helpers, add a VM_WARN_ON_ONCE() to detect > > cases where the VMA count exceeds the sysctl_max_map_count limit. > > > > This check will help catch future bugs or regressions where > > the VMAs are allocated exceeding the limit. > > > > The warning is placed in the main vma_count_*() helpers, while the > > internal *_nocheck variants bypass it. _nocheck helpers are used to > > ensure that the assertion does not trigger a false positive in > > the legitimate case of a temporary VMA increase past the limit > > by a VMA split in munmap(). > > > > Cc: Andrew Morton <akpm@linux-foundation.org> > > Cc: David Hildenbrand <david@redhat.com> > > Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com> > > Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > Cc: Mike Rapoport <rppt@kernel.org> > > Cc: Minchan Kim <minchan@kernel.org> > > Cc: Pedro Falcato <pfalcato@suse.de> > > Signed-off-by: Kalesh Singh <kaleshsingh@google.com> > > --- > > > > Changes in v2: > > - Add assertions if exceeding max_vma_count limit, per Pedro > > > > include/linux/mm.h | 12 ++++++-- > > mm/internal.h | 1 - > > mm/vma.c | 49 +++++++++++++++++++++++++------- > > tools/testing/vma/vma_internal.h | 7 ++++- > > 4 files changed, 55 insertions(+), 14 deletions(-) > > > > diff --git a/include/linux/mm.h b/include/linux/mm.h > > index 8bad1454984c..3a3749d7015c 100644 > > --- a/include/linux/mm.h > > +++ b/include/linux/mm.h > > @@ -4219,19 +4219,27 @@ static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps) > > > > void snapshot_page(struct page_snapshot *ps, const struct page *page); > > > > +int vma_count_remaining(const struct mm_struct *mm); > > + > > static inline void vma_count_init(struct mm_struct *mm) > > { > > ACCESS_PRIVATE(mm, __vma_count) = 0; > > } > > > > -static inline void vma_count_add(struct mm_struct *mm, int nr_vmas) > > +static inline void __vma_count_add_nocheck(struct mm_struct *mm, int nr_vmas) > > { > > ACCESS_PRIVATE(mm, __vma_count) += nr_vmas; > > } > > > > +static inline void vma_count_add(struct mm_struct *mm, int nr_vmas) > > +{ > > + VM_WARN_ON_ONCE(!vma_count_remaining(mm)); > > Can't that fire when changing the max count from user space at just the > wrong time? You are right: technically it's possible if it was raised between the time of checking and when the new VMA is added. > > I assume we'll have to tolerated that and might just want to drop this > patch from the series. > It is compiled out in !CONFIG_VM_DEBUG builds, would we still want to drop it? Thanks, Kalesh > -- > Cheers > > David / dhildenb >
>> Can't that fire when changing the max count from user space at just the >> wrong time? > > You are right: technically it's possible if it was raised between the > time of checking and when the new VMA is added. > >> >> I assume we'll have to tolerated that and might just want to drop this >> patch from the series. >> > > It is compiled out in !CONFIG_VM_DEBUG builds, would we still want to drop it? Due to the racy nature I think any kinds of assertions would be wrong. Without any such races possible I would agree that the checks would be nice to have. -- Cheers David / dhildenb
On Wed, Sep 17, 2025 at 11:34 AM David Hildenbrand <david@redhat.com> wrote: > > >> Can't that fire when changing the max count from user space at just the > >> wrong time? > > > > You are right: technically it's possible if it was raised between the > > time of checking and when the new VMA is added. > > > >> > >> I assume we'll have to tolerated that and might just want to drop this > >> patch from the series. > >> > > > > It is compiled out in !CONFIG_VM_DEBUG builds, would we still want to drop it? > > Due to the racy nature I think any kinds of assertions would be wrong. > Without any such races possible I would agree that the checks would be > nice to have. Alright I'll drop this in the next revision. Thanks, Kalesh > > -- > Cheers > > David / dhildenb >
© 2016 - 2025 Red Hat, Inc.