mm/internal.h | 17 ++++++++++++++ mm/rmap.c | 64 +++++++++++++++++++++++++++++---------------------- 2 files changed, 53 insertions(+), 28 deletions(-)
If the anon_vma_alloc() is called, the num_children of the parent of
the anon_vma will be updated. But this operation occurs outside of
anon_vma_alloc(). There are two callers, one has itself as its parent,
while another has a real parent. That means they have the same logic.
The update of num_active_vmas and vma->anon_vma are not performed
together. These operations should be performed under a function.
Add an __anon_vma_alloc() function that implements anon_vma_alloc().
If the caller has a real parent, called __anon_vma_alloc() and pass
the parent to it. If it not, called anon_vma_alloc() directly. It will
set the parent and root of the anon_vma and also updates the num_children
of its parent anon_vma.
Introduce vma_attach_anon() and vma_detach_anon() to update
num_active_vmas with vma->anon_vma together.
Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
---
v2: fix a WARNING in unlink_anon_vmas and optimize the code
v1: https://lore.kernel.org/all/20250905132019.18915-1-yajun.deng@linux.dev/
---
mm/internal.h | 17 ++++++++++++++
mm/rmap.c | 64 +++++++++++++++++++++++++++++----------------------
2 files changed, 53 insertions(+), 28 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index 9b0129531d00..12bc71bb2304 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -953,6 +953,23 @@ static inline bool free_area_empty(struct free_area *area, int migratetype)
return list_empty(&area->free_list[migratetype]);
}
+static inline void vma_attach_anon(struct vm_area_struct *vma,
+ struct anon_vma *anon_vma)
+{
+ mmap_assert_locked(vma->vm_mm);
+ lockdep_assert_held_write(&anon_vma->root->rwsem);
+ vma->anon_vma = anon_vma;
+ vma->anon_vma->num_active_vmas++;
+}
+
+static inline void vma_detach_anon(struct vm_area_struct *vma)
+{
+ mmap_assert_locked(vma->vm_mm);
+ lockdep_assert_held_write(&vma->anon_vma->root->rwsem);
+ vma->anon_vma->num_active_vmas--;
+ vma->anon_vma = NULL;
+}
+
/* mm/util.c */
struct anon_vma *folio_anon_vma(const struct folio *folio);
diff --git a/mm/rmap.c b/mm/rmap.c
index 34333ae3bd80..de557707c34a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -86,15 +86,25 @@
static struct kmem_cache *anon_vma_cachep;
static struct kmem_cache *anon_vma_chain_cachep;
-static inline struct anon_vma *anon_vma_alloc(void)
+static inline struct anon_vma *__anon_vma_alloc(struct anon_vma *parent)
{
struct anon_vma *anon_vma;
anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
- if (anon_vma) {
- atomic_set(&anon_vma->refcount, 1);
- anon_vma->num_children = 0;
- anon_vma->num_active_vmas = 0;
+ if (!anon_vma)
+ return NULL;
+
+ atomic_set(&anon_vma->refcount, 1);
+ anon_vma->num_children = 0;
+ anon_vma->num_active_vmas = 0;
+ if (parent) {
+ /*
+ * The root anon_vma's rwsem is the lock actually used when we
+ * lock any of the anon_vmas in this anon_vma tree.
+ */
+ anon_vma->parent = parent;
+ anon_vma->root = parent->root;
+ } else {
anon_vma->parent = anon_vma;
/*
* Initialise the anon_vma root to point to itself. If called
@@ -102,10 +112,18 @@ static inline struct anon_vma *anon_vma_alloc(void)
*/
anon_vma->root = anon_vma;
}
+ anon_vma_lock_write(anon_vma);
+ anon_vma->parent->num_children++;
+ anon_vma_unlock_write(anon_vma);
return anon_vma;
}
+static inline struct anon_vma *anon_vma_alloc(void)
+{
+ return __anon_vma_alloc(NULL);
+}
+
static inline void anon_vma_free(struct anon_vma *anon_vma)
{
VM_BUG_ON(atomic_read(&anon_vma->refcount));
@@ -201,7 +219,6 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
anon_vma = anon_vma_alloc();
if (unlikely(!anon_vma))
goto out_enomem_free_avc;
- anon_vma->num_children++; /* self-parent link for new root */
allocated = anon_vma;
}
@@ -209,9 +226,8 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
/* page_table_lock to protect against threads */
spin_lock(&mm->page_table_lock);
if (likely(!vma->anon_vma)) {
- vma->anon_vma = anon_vma;
+ vma_attach_anon(vma, anon_vma);
anon_vma_chain_link(vma, avc, anon_vma);
- anon_vma->num_active_vmas++;
allocated = NULL;
avc = NULL;
}
@@ -355,38 +371,31 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
if (vma->anon_vma)
return 0;
- /* Then add our own anon_vma. */
- anon_vma = anon_vma_alloc();
- if (!anon_vma)
- goto out_error;
- anon_vma->num_active_vmas++;
avc = anon_vma_chain_alloc(GFP_KERNEL);
if (!avc)
- goto out_error_free_anon_vma;
+ goto out_error;
+
+ /* Then add our own anon_vma. */
+ anon_vma = __anon_vma_alloc(pvma->anon_vma);
+ if (!anon_vma)
+ goto out_error_free_avc;
- /*
- * The root anon_vma's rwsem is the lock actually used when we
- * lock any of the anon_vmas in this anon_vma tree.
- */
- anon_vma->root = pvma->anon_vma->root;
- anon_vma->parent = pvma->anon_vma;
/*
* With refcounts, an anon_vma can stay around longer than the
* process it belongs to. The root anon_vma needs to be pinned until
* this anon_vma is freed, because the lock lives in the root.
*/
get_anon_vma(anon_vma->root);
- /* Mark this anon_vma as the one where our new (COWed) pages go. */
- vma->anon_vma = anon_vma;
anon_vma_lock_write(anon_vma);
+ /* Mark this anon_vma as the one where our new (COWed) pages go. */
+ vma_attach_anon(vma, anon_vma);
anon_vma_chain_link(vma, avc, anon_vma);
- anon_vma->parent->num_children++;
anon_vma_unlock_write(anon_vma);
return 0;
- out_error_free_anon_vma:
- put_anon_vma(anon_vma);
+ out_error_free_avc:
+ anon_vma_chain_free(avc);
out_error:
unlink_anon_vmas(vma);
return -ENOMEM;
@@ -420,14 +429,13 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
anon_vma_chain_free(avc);
}
if (vma->anon_vma) {
- vma->anon_vma->num_active_vmas--;
-
/*
* vma would still be needed after unlink, and anon_vma will be prepared
* when handle fault.
*/
- vma->anon_vma = NULL;
+ vma_detach_anon(vma);
}
+
unlock_anon_vma_root(root);
/*
--
2.25.1
* Yajun Deng <yajun.deng@linux.dev> [250908 10:06]: > If the anon_vma_alloc() is called, the num_children of the parent of > the anon_vma will be updated. But this operation occurs outside of > anon_vma_alloc(). There are two callers, one has itself as its parent, > while another has a real parent. That means they have the same logic. > > The update of num_active_vmas and vma->anon_vma are not performed > together. These operations should be performed under a function. > > Add an __anon_vma_alloc() function that implements anon_vma_alloc(). > If the caller has a real parent, called __anon_vma_alloc() and pass > the parent to it. If it not, called anon_vma_alloc() directly. It will > set the parent and root of the anon_vma and also updates the num_children > of its parent anon_vma. > > Introduce vma_attach_anon() and vma_detach_anon() to update > num_active_vmas with vma->anon_vma together. > > Signed-off-by: Yajun Deng <yajun.deng@linux.dev> > --- > v2: fix a WARNING in unlink_anon_vmas and optimize the code Fixed all the bugs I found in v1? Tested this patch this time? What about the parent fields without the lock? > v1: https://lore.kernel.org/all/20250905132019.18915-1-yajun.deng@linux.dev/ > --- > mm/internal.h | 17 ++++++++++++++ > mm/rmap.c | 64 +++++++++++++++++++++++++++++---------------------- > 2 files changed, 53 insertions(+), 28 deletions(-) > > diff --git a/mm/internal.h b/mm/internal.h > index 9b0129531d00..12bc71bb2304 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -953,6 +953,23 @@ static inline bool free_area_empty(struct free_area *area, int migratetype) > return list_empty(&area->free_list[migratetype]); > } > > +static inline void vma_attach_anon(struct vm_area_struct *vma, > + struct anon_vma *anon_vma) Function names are still confusing. > +{ > + mmap_assert_locked(vma->vm_mm); > + lockdep_assert_held_write(&anon_vma->root->rwsem); > + vma->anon_vma = anon_vma; > + vma->anon_vma->num_active_vmas++; > +} > + > +static inline void vma_detach_anon(struct vm_area_struct *vma) > +{ > + mmap_assert_locked(vma->vm_mm); > + lockdep_assert_held_write(&vma->anon_vma->root->rwsem); > + vma->anon_vma->num_active_vmas--; > + vma->anon_vma = NULL; > +} > + > /* mm/util.c */ > struct anon_vma *folio_anon_vma(const struct folio *folio); > > diff --git a/mm/rmap.c b/mm/rmap.c > index 34333ae3bd80..de557707c34a 100644 > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -86,15 +86,25 @@ > static struct kmem_cache *anon_vma_cachep; > static struct kmem_cache *anon_vma_chain_cachep; > > -static inline struct anon_vma *anon_vma_alloc(void) > +static inline struct anon_vma *__anon_vma_alloc(struct anon_vma *parent) > { > struct anon_vma *anon_vma; > > anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); > - if (anon_vma) { > - atomic_set(&anon_vma->refcount, 1); > - anon_vma->num_children = 0; > - anon_vma->num_active_vmas = 0; > + if (!anon_vma) > + return NULL; > + > + atomic_set(&anon_vma->refcount, 1); > + anon_vma->num_children = 0; > + anon_vma->num_active_vmas = 0; > + if (parent) { > + /* > + * The root anon_vma's rwsem is the lock actually used when we > + * lock any of the anon_vmas in this anon_vma tree. > + */ > + anon_vma->parent = parent; > + anon_vma->root = parent->root; > + } else { None of this is worth doing in this function. A wrapper only makes the code harder to follow. > anon_vma->parent = anon_vma; > /* > * Initialise the anon_vma root to point to itself. If called > @@ -102,10 +112,18 @@ static inline struct anon_vma *anon_vma_alloc(void) > */ > anon_vma->root = anon_vma; > } > + anon_vma_lock_write(anon_vma); > + anon_vma->parent->num_children++; > + anon_vma_unlock_write(anon_vma); > > return anon_vma; > } > > +static inline struct anon_vma *anon_vma_alloc(void) > +{ > + return __anon_vma_alloc(NULL); > +} > + > static inline void anon_vma_free(struct anon_vma *anon_vma) > { > VM_BUG_ON(atomic_read(&anon_vma->refcount)); > @@ -201,7 +219,6 @@ int __anon_vma_prepare(struct vm_area_struct *vma) > anon_vma = anon_vma_alloc(); > if (unlikely(!anon_vma)) > goto out_enomem_free_avc; > - anon_vma->num_children++; /* self-parent link for new root */ > allocated = anon_vma; > } > > @@ -209,9 +226,8 @@ int __anon_vma_prepare(struct vm_area_struct *vma) > /* page_table_lock to protect against threads */ > spin_lock(&mm->page_table_lock); > if (likely(!vma->anon_vma)) { > - vma->anon_vma = anon_vma; > + vma_attach_anon(vma, anon_vma); > anon_vma_chain_link(vma, avc, anon_vma); > - anon_vma->num_active_vmas++; > allocated = NULL; > avc = NULL; > } > @@ -355,38 +371,31 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) > if (vma->anon_vma) > return 0; > > - /* Then add our own anon_vma. */ > - anon_vma = anon_vma_alloc(); > - if (!anon_vma) > - goto out_error; > - anon_vma->num_active_vmas++; > avc = anon_vma_chain_alloc(GFP_KERNEL); > if (!avc) > - goto out_error_free_anon_vma; > + goto out_error; > + > + /* Then add our own anon_vma. */ > + anon_vma = __anon_vma_alloc(pvma->anon_vma); > + if (!anon_vma) > + goto out_error_free_avc; > > - /* > - * The root anon_vma's rwsem is the lock actually used when we > - * lock any of the anon_vmas in this anon_vma tree. > - */ > - anon_vma->root = pvma->anon_vma->root; > - anon_vma->parent = pvma->anon_vma; > /* > * With refcounts, an anon_vma can stay around longer than the > * process it belongs to. The root anon_vma needs to be pinned until > * this anon_vma is freed, because the lock lives in the root. > */ > get_anon_vma(anon_vma->root); > - /* Mark this anon_vma as the one where our new (COWed) pages go. */ > - vma->anon_vma = anon_vma; > anon_vma_lock_write(anon_vma); > + /* Mark this anon_vma as the one where our new (COWed) pages go. */ > + vma_attach_anon(vma, anon_vma); > anon_vma_chain_link(vma, avc, anon_vma); > - anon_vma->parent->num_children++; > anon_vma_unlock_write(anon_vma); > > return 0; > > - out_error_free_anon_vma: > - put_anon_vma(anon_vma); > + out_error_free_avc: > + anon_vma_chain_free(avc); > out_error: > unlink_anon_vmas(vma); > return -ENOMEM; Your plan was to reduce the complexity of the code by isolating duplicate code in a function. This hasn't panned out in this case as the code is much harder to follow. For instance, the parent logic was spelled out before while now it's in __anon_vma_alloc(), which seems unexpected. It is being initialized in the alloc function.. You had to add two functions for the two call sites, which is a hint things are not going to work out here. Reversing the logic above resulted in a different but similar undo steps while hiding what has happened with the anon_vma->parent. This isn't simplifying anything so I think you should move on. > @@ -420,14 +429,13 @@ void unlink_anon_vmas(struct vm_area_struct *vma) > anon_vma_chain_free(avc); > } > if (vma->anon_vma) { > - vma->anon_vma->num_active_vmas--; > - > /* > * vma would still be needed after unlink, and anon_vma will be prepared > * when handle fault. > */ > - vma->anon_vma = NULL; > + vma_detach_anon(vma); > } > + > unlock_anon_vma_root(root); > > /* > -- > 2.25.1 > >
On Mon, Sep 08, 2025 at 02:05:04PM +0000, Yajun Deng wrote: > If the anon_vma_alloc() is called, the num_children of the parent of > the anon_vma will be updated. But this operation occurs outside of > anon_vma_alloc(). There are two callers, one has itself as its parent, > while another has a real parent. That means they have the same logic. No they do not. As I explained to you at length. > > The update of num_active_vmas and vma->anon_vma are not performed > together. These operations should be performed under a function. > > Add an __anon_vma_alloc() function that implements anon_vma_alloc(). > If the caller has a real parent, called __anon_vma_alloc() and pass > the parent to it. If it not, called anon_vma_alloc() directly. It will > set the parent and root of the anon_vma and also updates the num_children > of its parent anon_vma. Doing exactly what I told you not to do... > > Introduce vma_attach_anon() and vma_detach_anon() to update > num_active_vmas with vma->anon_vma together. > > Signed-off-by: Yajun Deng <yajun.deng@linux.dev> NAK. There's so much wrong with this and you've ignored review Liam and I have spent time giving you (a resource which I have very little of), it's simply not a good use of my time to look at this further. Please abandon this idea, it's not good, and you're not implementing it well. Thanks, Lorenzo
On Mon, Sep 08, 2025 at 02:05:04PM +0000, Yajun Deng wrote: > If the anon_vma_alloc() is called, the num_children of the parent of > the anon_vma will be updated. But this operation occurs outside of > anon_vma_alloc(). There are two callers, one has itself as its parent, > while another has a real parent. That means they have the same logic. No, they don't. This is terrible. Please stop.
© 2016 - 2025 Red Hat, Inc.