[PATCH v2 3/8] mm/rmap: remove unnecessary root lock dance in anon_vma clone, unmap

Lorenzo Stoakes posted 8 patches 1 month ago
There is a newer version of this series
[PATCH v2 3/8] mm/rmap: remove unnecessary root lock dance in anon_vma clone, unmap
Posted by Lorenzo Stoakes 1 month ago
The root anon_vma of all anon_vma's linked to a VMA must by definition be
the same - a VMA and all of its descendants/ancestors must exist in the
same CoW chain.

Commit bb4aa39676f7 ("mm: avoid repeated anon_vma lock/unlock sequences in
anon_vma_clone()") introduced paranoid checking of the root anon_vma
remaining the same throughout all AVC's in 2011.

I think 15 years later we can safely assume that this is always the case.

Additionally, since unfaulted VMAs being cloned from or unlinked are
no-op's, we can simply lock the anon_vma's associated with this rather than
doing any specific dance around this.

This removes unnecessary checks and makes it clear that the root anon_vma
is shared between all anon_vma's in a given VMA's anon_vma_chain.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 mm/rmap.c | 51 +++++++++++++++------------------------------------
 1 file changed, 15 insertions(+), 36 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index de2cbe860566..6ac42671bedd 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -232,32 +232,6 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
 	return -ENOMEM;
 }
 
-/*
- * This is a useful helper function for locking the anon_vma root as
- * we traverse the vma->anon_vma_chain, looping over anon_vma's that
- * have the same vma.
- *
- * Such anon_vma's should have the same root, so you'd expect to see
- * just a single mutex_lock for the whole traversal.
- */
-static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
-{
-	struct anon_vma *new_root = anon_vma->root;
-	if (new_root != root) {
-		if (WARN_ON_ONCE(root))
-			up_write(&root->rwsem);
-		root = new_root;
-		down_write(&root->rwsem);
-	}
-	return root;
-}
-
-static inline void unlock_anon_vma_root(struct anon_vma *root)
-{
-	if (root)
-		up_write(&root->rwsem);
-}
-
 static void check_anon_vma_clone(struct vm_area_struct *dst,
 				 struct vm_area_struct *src)
 {
@@ -310,26 +284,28 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma);
 int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 {
 	struct anon_vma_chain *avc, *pavc;
-	struct anon_vma *root = NULL;
 
 	check_anon_vma_clone(dst, src);
 
 	if (!src->anon_vma)
 		return 0;
 
+	check_anon_vma_clone(dst, src);
+
+	/* All anon_vma's share the same root. */
+	anon_vma_lock_write(src->anon_vma);
 	list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
 		struct anon_vma *anon_vma;
 
 		avc = anon_vma_chain_alloc(GFP_NOWAIT);
 		if (unlikely(!avc)) {
-			unlock_anon_vma_root(root);
-			root = NULL;
+			anon_vma_unlock_write(src->anon_vma);
 			avc = anon_vma_chain_alloc(GFP_KERNEL);
 			if (!avc)
 				goto enomem_failure;
+			anon_vma_lock_write(src->anon_vma);
 		}
 		anon_vma = pavc->anon_vma;
-		root = lock_anon_vma_root(root, anon_vma);
 		anon_vma_chain_link(dst, avc, anon_vma);
 
 		/*
@@ -346,7 +322,8 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 	}
 	if (dst->anon_vma)
 		dst->anon_vma->num_active_vmas++;
-	unlock_anon_vma_root(root);
+
+	anon_vma_unlock_write(src->anon_vma);
 	return 0;
 
  enomem_failure:
@@ -471,17 +448,19 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma)
 void unlink_anon_vmas(struct vm_area_struct *vma)
 {
 	struct anon_vma_chain *avc, *next;
-	struct anon_vma *root = NULL;
+	struct anon_vma *active_anon_vma = vma->anon_vma;
 
 	/* Always hold mmap lock, read-lock on unmap possibly. */
 	mmap_assert_locked(vma->vm_mm);
 
 	/* Unfaulted is a no-op. */
-	if (!vma->anon_vma) {
+	if (!active_anon_vma) {
 		VM_WARN_ON_ONCE(!list_empty(&vma->anon_vma_chain));
 		return;
 	}
 
+	anon_vma_lock_write(active_anon_vma);
+
 	/*
 	 * Unlink each anon_vma chained to the VMA.  This list is ordered
 	 * from newest to oldest, ensuring the root anon_vma gets freed last.
@@ -489,7 +468,6 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
 	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
 		struct anon_vma *anon_vma = avc->anon_vma;
 
-		root = lock_anon_vma_root(root, anon_vma);
 		anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
 
 		/*
@@ -505,13 +483,14 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
 		anon_vma_chain_free(avc);
 	}
 
-	vma->anon_vma->num_active_vmas--;
+	active_anon_vma->num_active_vmas--;
 	/*
 	 * vma would still be needed after unlink, and anon_vma will be prepared
 	 * when handle fault.
 	 */
 	vma->anon_vma = NULL;
-	unlock_anon_vma_root(root);
+	anon_vma_unlock_write(active_anon_vma);
+
 
 	/*
 	 * Iterate the list once more, it now only contains empty and unlinked
-- 
2.52.0
Re: [PATCH v2 3/8] mm/rmap: remove unnecessary root lock dance in anon_vma clone, unmap
Posted by Suren Baghdasaryan 3 weeks, 4 days ago
On Tue, Jan 6, 2026 at 7:04 AM Lorenzo Stoakes
<lorenzo.stoakes@oracle.com> wrote:
>
> The root anon_vma of all anon_vma's linked to a VMA must by definition be
> the same - a VMA and all of its descendants/ancestors must exist in the
> same CoW chain.
>
> Commit bb4aa39676f7 ("mm: avoid repeated anon_vma lock/unlock sequences in
> anon_vma_clone()") introduced paranoid checking of the root anon_vma
> remaining the same throughout all AVC's in 2011.
>
> I think 15 years later we can safely assume that this is always the case.
>
> Additionally, since unfaulted VMAs being cloned from or unlinked are
> no-op's, we can simply lock the anon_vma's associated with this rather than
> doing any specific dance around this.
>
> This removes unnecessary checks and makes it clear that the root anon_vma
> is shared between all anon_vma's in a given VMA's anon_vma_chain.
>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> ---
>  mm/rmap.c | 51 +++++++++++++++------------------------------------
>  1 file changed, 15 insertions(+), 36 deletions(-)
>
> diff --git a/mm/rmap.c b/mm/rmap.c
> index de2cbe860566..6ac42671bedd 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -232,32 +232,6 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
>         return -ENOMEM;
>  }
>
> -/*
> - * This is a useful helper function for locking the anon_vma root as
> - * we traverse the vma->anon_vma_chain, looping over anon_vma's that
> - * have the same vma.
> - *
> - * Such anon_vma's should have the same root, so you'd expect to see
> - * just a single mutex_lock for the whole traversal.
> - */
> -static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
> -{
> -       struct anon_vma *new_root = anon_vma->root;
> -       if (new_root != root) {
> -               if (WARN_ON_ONCE(root))
> -                       up_write(&root->rwsem);
> -               root = new_root;
> -               down_write(&root->rwsem);
> -       }
> -       return root;
> -}
> -
> -static inline void unlock_anon_vma_root(struct anon_vma *root)
> -{
> -       if (root)
> -               up_write(&root->rwsem);
> -}
> -
>  static void check_anon_vma_clone(struct vm_area_struct *dst,
>                                  struct vm_area_struct *src)
>  {
> @@ -310,26 +284,28 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma);
>  int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
>  {
>         struct anon_vma_chain *avc, *pavc;
> -       struct anon_vma *root = NULL;
>
>         check_anon_vma_clone(dst, src);
>
>         if (!src->anon_vma)
>                 return 0;
>
> +       check_anon_vma_clone(dst, src);

check_anon_vma_clone() was already called 4 lines above. I'm guessing
this was accidentally carried over from the previous version?

> +
> +       /* All anon_vma's share the same root. */
> +       anon_vma_lock_write(src->anon_vma);
>         list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
>                 struct anon_vma *anon_vma;
>
>                 avc = anon_vma_chain_alloc(GFP_NOWAIT);
>                 if (unlikely(!avc)) {
> -                       unlock_anon_vma_root(root);
> -                       root = NULL;
> +                       anon_vma_unlock_write(src->anon_vma);
>                         avc = anon_vma_chain_alloc(GFP_KERNEL);
>                         if (!avc)
>                                 goto enomem_failure;
> +                       anon_vma_lock_write(src->anon_vma);
>                 }
>                 anon_vma = pavc->anon_vma;
> -               root = lock_anon_vma_root(root, anon_vma);
>                 anon_vma_chain_link(dst, avc, anon_vma);
>
>                 /*
> @@ -346,7 +322,8 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
>         }
>         if (dst->anon_vma)
>                 dst->anon_vma->num_active_vmas++;
> -       unlock_anon_vma_root(root);
> +
> +       anon_vma_unlock_write(src->anon_vma);
>         return 0;
>
>   enomem_failure:
> @@ -471,17 +448,19 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma)
>  void unlink_anon_vmas(struct vm_area_struct *vma)
>  {
>         struct anon_vma_chain *avc, *next;
> -       struct anon_vma *root = NULL;
> +       struct anon_vma *active_anon_vma = vma->anon_vma;
>
>         /* Always hold mmap lock, read-lock on unmap possibly. */
>         mmap_assert_locked(vma->vm_mm);
>
>         /* Unfaulted is a no-op. */
> -       if (!vma->anon_vma) {
> +       if (!active_anon_vma) {
>                 VM_WARN_ON_ONCE(!list_empty(&vma->anon_vma_chain));
>                 return;
>         }
>
> +       anon_vma_lock_write(active_anon_vma);
> +
>         /*
>          * Unlink each anon_vma chained to the VMA.  This list is ordered
>          * from newest to oldest, ensuring the root anon_vma gets freed last.
> @@ -489,7 +468,6 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
>         list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
>                 struct anon_vma *anon_vma = avc->anon_vma;
>
> -               root = lock_anon_vma_root(root, anon_vma);
>                 anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
>
>                 /*
> @@ -505,13 +483,14 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
>                 anon_vma_chain_free(avc);
>         }
>
> -       vma->anon_vma->num_active_vmas--;
> +       active_anon_vma->num_active_vmas--;
>         /*
>          * vma would still be needed after unlink, and anon_vma will be prepared
>          * when handle fault.
>          */
>         vma->anon_vma = NULL;
> -       unlock_anon_vma_root(root);
> +       anon_vma_unlock_write(active_anon_vma);
> +
>
>         /*
>          * Iterate the list once more, it now only contains empty and unlinked
> --
> 2.52.0
>
Re: [PATCH v2 3/8] mm/rmap: remove unnecessary root lock dance in anon_vma clone, unmap
Posted by Lorenzo Stoakes 3 weeks, 4 days ago
On Wed, Jan 14, 2026 at 08:55:58AM -0800, Suren Baghdasaryan wrote:
> On Tue, Jan 6, 2026 at 7:04 AM Lorenzo Stoakes
> <lorenzo.stoakes@oracle.com> wrote:
> >
> > The root anon_vma of all anon_vma's linked to a VMA must by definition be
> > the same - a VMA and all of its descendants/ancestors must exist in the
> > same CoW chain.
> >
> > Commit bb4aa39676f7 ("mm: avoid repeated anon_vma lock/unlock sequences in
> > anon_vma_clone()") introduced paranoid checking of the root anon_vma
> > remaining the same throughout all AVC's in 2011.
> >
> > I think 15 years later we can safely assume that this is always the case.
> >
> > Additionally, since unfaulted VMAs being cloned from or unlinked are
> > no-op's, we can simply lock the anon_vma's associated with this rather than
> > doing any specific dance around this.
> >
> > This removes unnecessary checks and makes it clear that the root anon_vma
> > is shared between all anon_vma's in a given VMA's anon_vma_chain.
> >
> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > ---
> >  mm/rmap.c | 51 +++++++++++++++------------------------------------
> >  1 file changed, 15 insertions(+), 36 deletions(-)
> >
> > diff --git a/mm/rmap.c b/mm/rmap.c
> > index de2cbe860566..6ac42671bedd 100644
> > --- a/mm/rmap.c
> > +++ b/mm/rmap.c
> > @@ -232,32 +232,6 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
> >         return -ENOMEM;
> >  }
> >
> > -/*
> > - * This is a useful helper function for locking the anon_vma root as
> > - * we traverse the vma->anon_vma_chain, looping over anon_vma's that
> > - * have the same vma.
> > - *
> > - * Such anon_vma's should have the same root, so you'd expect to see
> > - * just a single mutex_lock for the whole traversal.
> > - */
> > -static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
> > -{
> > -       struct anon_vma *new_root = anon_vma->root;
> > -       if (new_root != root) {
> > -               if (WARN_ON_ONCE(root))
> > -                       up_write(&root->rwsem);
> > -               root = new_root;
> > -               down_write(&root->rwsem);
> > -       }
> > -       return root;
> > -}
> > -
> > -static inline void unlock_anon_vma_root(struct anon_vma *root)
> > -{
> > -       if (root)
> > -               up_write(&root->rwsem);
> > -}
> > -
> >  static void check_anon_vma_clone(struct vm_area_struct *dst,
> >                                  struct vm_area_struct *src)
> >  {
> > @@ -310,26 +284,28 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma);
> >  int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
> >  {
> >         struct anon_vma_chain *avc, *pavc;
> > -       struct anon_vma *root = NULL;
> >
> >         check_anon_vma_clone(dst, src);
> >
> >         if (!src->anon_vma)
> >                 return 0;
> >
> > +       check_anon_vma_clone(dst, src);
>
> check_anon_vma_clone() was already called 4 lines above. I'm guessing
> this was accidentally carried over from the previous version?
>

Yeah I don't know why this is here, in the tree it isn't, maybe Andrew noticed
and elided? :)

But yeah the check above is correct so current in-tree state is correct.

Thanks, Lorenzo
Re: [PATCH v2 3/8] mm/rmap: remove unnecessary root lock dance in anon_vma clone, unmap
Posted by Andrew Morton 3 weeks, 4 days ago
On Wed, 14 Jan 2026 17:54:07 +0000 Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote:

> > >  {
> > > @@ -310,26 +284,28 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma);
> > >  int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
> > >  {
> > >         struct anon_vma_chain *avc, *pavc;
> > > -       struct anon_vma *root = NULL;
> > >
> > >         check_anon_vma_clone(dst, src);
> > >
> > >         if (!src->anon_vma)
> > >                 return 0;
> > >
> > > +       check_anon_vma_clone(dst, src);
> >
> > check_anon_vma_clone() was already called 4 lines above. I'm guessing
> > this was accidentally carried over from the previous version?
> >
> 
> Yeah I don't know why this is here, in the tree it isn't, maybe Andrew noticed
> and elided? :)

Thanks, but I wouldn't have done that silently.

The later "mm/rmap: separate out fork-only logic on anon_vma_clone()" removed it:

: @@ -281,17 +317,17 @@ static void cleanup_partial_anon_vmas(st
:   *
:   * Returns: 0 on success, -ENOMEM on failure.
:   */
: -int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
: +int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src,
: +		   enum vma_operation operation)
:  {
:  	struct anon_vma_chain *avc, *pavc;
: +	struct anon_vma *active_anon_vma = src->anon_vma;
:  
: -	check_anon_vma_clone(dst, src);
: +	check_anon_vma_clone(dst, src, operation);
:  
: -	if (!src->anon_vma)
: +	if (!active_anon_vma)
:  		return 0;
:  
: -	check_anon_vma_clone(dst, src);
: -
:  	/*
:  	 * Allocate AVCs. We don't need an anon_vma lock for this as we
:  	 * are not updating the anon_vma rbtree nor are we changing
Re: [PATCH v2 3/8] mm/rmap: remove unnecessary root lock dance in anon_vma clone, unmap
Posted by Lorenzo Stoakes 3 weeks, 4 days ago
On Wed, Jan 14, 2026 at 02:12:04PM -0800, Andrew Morton wrote:
> On Wed, 14 Jan 2026 17:54:07 +0000 Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote:
>
> > > >  {
> > > > @@ -310,26 +284,28 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma);
> > > >  int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
> > > >  {
> > > >         struct anon_vma_chain *avc, *pavc;
> > > > -       struct anon_vma *root = NULL;
> > > >
> > > >         check_anon_vma_clone(dst, src);
> > > >
> > > >         if (!src->anon_vma)
> > > >                 return 0;
> > > >
> > > > +       check_anon_vma_clone(dst, src);
> > >
> > > check_anon_vma_clone() was already called 4 lines above. I'm guessing
> > > this was accidentally carried over from the previous version?
> > >
> >
> > Yeah I don't know why this is here, in the tree it isn't, maybe Andrew noticed
> > and elided? :)
>
> Thanks, but I wouldn't have done that silently.

Yeah I didn't think you would!

>
> The later "mm/rmap: separate out fork-only logic on anon_vma_clone()" removed it:
>
> : @@ -281,17 +317,17 @@ static void cleanup_partial_anon_vmas(st
> :   *
> :   * Returns: 0 on success, -ENOMEM on failure.
> :   */
> : -int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
> : +int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src,
> : +		   enum vma_operation operation)
> :  {
> :  	struct anon_vma_chain *avc, *pavc;
> : +	struct anon_vma *active_anon_vma = src->anon_vma;
> :
> : -	check_anon_vma_clone(dst, src);
> : +	check_anon_vma_clone(dst, src, operation);
> :
> : -	if (!src->anon_vma)
> : +	if (!active_anon_vma)
> :  		return 0;
> :
> : -	check_anon_vma_clone(dst, src);
> : -
> :  	/*
> :  	 * Allocate AVCs. We don't need an anon_vma lock for this as we
> :  	 * are not updating the anon_vma rbtree nor are we changing
>

Thanks yeah we figured it out, luckily it's completely benign so not a
bisection hazard so good to leave as is, but still also - doh! :)

Cheers, Lorenzo
Re: [PATCH v2 3/8] mm/rmap: remove unnecessary root lock dance in anon_vma clone, unmap
Posted by Suren Baghdasaryan 3 weeks, 4 days ago
On Wed, Jan 14, 2026 at 9:54 AM Lorenzo Stoakes
<lorenzo.stoakes@oracle.com> wrote:
>
> On Wed, Jan 14, 2026 at 08:55:58AM -0800, Suren Baghdasaryan wrote:
> > On Tue, Jan 6, 2026 at 7:04 AM Lorenzo Stoakes
> > <lorenzo.stoakes@oracle.com> wrote:
> > >
> > > The root anon_vma of all anon_vma's linked to a VMA must by definition be
> > > the same - a VMA and all of its descendants/ancestors must exist in the
> > > same CoW chain.
> > >
> > > Commit bb4aa39676f7 ("mm: avoid repeated anon_vma lock/unlock sequences in
> > > anon_vma_clone()") introduced paranoid checking of the root anon_vma
> > > remaining the same throughout all AVC's in 2011.
> > >
> > > I think 15 years later we can safely assume that this is always the case.
> > >
> > > Additionally, since unfaulted VMAs being cloned from or unlinked are
> > > no-op's, we can simply lock the anon_vma's associated with this rather than
> > > doing any specific dance around this.
> > >
> > > This removes unnecessary checks and makes it clear that the root anon_vma
> > > is shared between all anon_vma's in a given VMA's anon_vma_chain.
> > >
> > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > > ---
> > >  mm/rmap.c | 51 +++++++++++++++------------------------------------
> > >  1 file changed, 15 insertions(+), 36 deletions(-)
> > >
> > > diff --git a/mm/rmap.c b/mm/rmap.c
> > > index de2cbe860566..6ac42671bedd 100644
> > > --- a/mm/rmap.c
> > > +++ b/mm/rmap.c
> > > @@ -232,32 +232,6 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
> > >         return -ENOMEM;
> > >  }
> > >
> > > -/*
> > > - * This is a useful helper function for locking the anon_vma root as
> > > - * we traverse the vma->anon_vma_chain, looping over anon_vma's that
> > > - * have the same vma.
> > > - *
> > > - * Such anon_vma's should have the same root, so you'd expect to see
> > > - * just a single mutex_lock for the whole traversal.
> > > - */
> > > -static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
> > > -{
> > > -       struct anon_vma *new_root = anon_vma->root;
> > > -       if (new_root != root) {
> > > -               if (WARN_ON_ONCE(root))
> > > -                       up_write(&root->rwsem);
> > > -               root = new_root;
> > > -               down_write(&root->rwsem);
> > > -       }
> > > -       return root;
> > > -}
> > > -
> > > -static inline void unlock_anon_vma_root(struct anon_vma *root)
> > > -{
> > > -       if (root)
> > > -               up_write(&root->rwsem);
> > > -}
> > > -
> > >  static void check_anon_vma_clone(struct vm_area_struct *dst,
> > >                                  struct vm_area_struct *src)
> > >  {
> > > @@ -310,26 +284,28 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma);
> > >  int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
> > >  {
> > >         struct anon_vma_chain *avc, *pavc;
> > > -       struct anon_vma *root = NULL;
> > >
> > >         check_anon_vma_clone(dst, src);
> > >
> > >         if (!src->anon_vma)
> > >                 return 0;
> > >
> > > +       check_anon_vma_clone(dst, src);
> >
> > check_anon_vma_clone() was already called 4 lines above. I'm guessing
> > this was accidentally carried over from the previous version?
> >
>
> Yeah I don't know why this is here, in the tree it isn't, maybe Andrew noticed
> and elided? :)

I noticed when reviewing later patches that you remove this extra call
in one of them. That's why the current tree is fine.

>
> But yeah the check above is correct so current in-tree state is correct.
>
> Thanks, Lorenzo
Re: [PATCH v2 3/8] mm/rmap: remove unnecessary root lock dance in anon_vma clone, unmap
Posted by Liam R. Howlett 1 month ago
* Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [260106 10:04]:
> The root anon_vma of all anon_vma's linked to a VMA must by definition be
> the same - a VMA and all of its descendants/ancestors must exist in the
> same CoW chain.
> 
> Commit bb4aa39676f7 ("mm: avoid repeated anon_vma lock/unlock sequences in
> anon_vma_clone()") introduced paranoid checking of the root anon_vma
> remaining the same throughout all AVC's in 2011.
> 
> I think 15 years later we can safely assume that this is always the case.
> 
> Additionally, since unfaulted VMAs being cloned from or unlinked are
> no-op's, we can simply lock the anon_vma's associated with this rather than
> doing any specific dance around this.
> 
> This removes unnecessary checks and makes it clear that the root anon_vma
> is shared between all anon_vma's in a given VMA's anon_vma_chain.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

One extra whitespace, but lgtm.

Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>

> ---
>  mm/rmap.c | 51 +++++++++++++++------------------------------------
>  1 file changed, 15 insertions(+), 36 deletions(-)
> 
> diff --git a/mm/rmap.c b/mm/rmap.c
> index de2cbe860566..6ac42671bedd 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -232,32 +232,6 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
>  	return -ENOMEM;
>  }
>  
> -/*
> - * This is a useful helper function for locking the anon_vma root as
> - * we traverse the vma->anon_vma_chain, looping over anon_vma's that
> - * have the same vma.
> - *
> - * Such anon_vma's should have the same root, so you'd expect to see
> - * just a single mutex_lock for the whole traversal.
> - */
> -static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
> -{
> -	struct anon_vma *new_root = anon_vma->root;
> -	if (new_root != root) {
> -		if (WARN_ON_ONCE(root))
> -			up_write(&root->rwsem);
> -		root = new_root;
> -		down_write(&root->rwsem);
> -	}
> -	return root;
> -}
> -
> -static inline void unlock_anon_vma_root(struct anon_vma *root)
> -{
> -	if (root)
> -		up_write(&root->rwsem);
> -}
> -
>  static void check_anon_vma_clone(struct vm_area_struct *dst,
>  				 struct vm_area_struct *src)
>  {
> @@ -310,26 +284,28 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma);
>  int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
>  {
>  	struct anon_vma_chain *avc, *pavc;
> -	struct anon_vma *root = NULL;
>  
>  	check_anon_vma_clone(dst, src);
>  
>  	if (!src->anon_vma)
>  		return 0;
>  
> +	check_anon_vma_clone(dst, src);
> +
> +	/* All anon_vma's share the same root. */
> +	anon_vma_lock_write(src->anon_vma);
>  	list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
>  		struct anon_vma *anon_vma;
>  
>  		avc = anon_vma_chain_alloc(GFP_NOWAIT);
>  		if (unlikely(!avc)) {
> -			unlock_anon_vma_root(root);
> -			root = NULL;
> +			anon_vma_unlock_write(src->anon_vma);
>  			avc = anon_vma_chain_alloc(GFP_KERNEL);
>  			if (!avc)
>  				goto enomem_failure;
> +			anon_vma_lock_write(src->anon_vma);
>  		}
>  		anon_vma = pavc->anon_vma;
> -		root = lock_anon_vma_root(root, anon_vma);
>  		anon_vma_chain_link(dst, avc, anon_vma);
>  
>  		/*
> @@ -346,7 +322,8 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
>  	}
>  	if (dst->anon_vma)
>  		dst->anon_vma->num_active_vmas++;
> -	unlock_anon_vma_root(root);
> +
> +	anon_vma_unlock_write(src->anon_vma);
>  	return 0;
>  
>   enomem_failure:
> @@ -471,17 +448,19 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma)
>  void unlink_anon_vmas(struct vm_area_struct *vma)
>  {
>  	struct anon_vma_chain *avc, *next;
> -	struct anon_vma *root = NULL;
> +	struct anon_vma *active_anon_vma = vma->anon_vma;
>  
>  	/* Always hold mmap lock, read-lock on unmap possibly. */
>  	mmap_assert_locked(vma->vm_mm);
>  
>  	/* Unfaulted is a no-op. */
> -	if (!vma->anon_vma) {
> +	if (!active_anon_vma) {
>  		VM_WARN_ON_ONCE(!list_empty(&vma->anon_vma_chain));
>  		return;
>  	}
>  
> +	anon_vma_lock_write(active_anon_vma);
> +
>  	/*
>  	 * Unlink each anon_vma chained to the VMA.  This list is ordered
>  	 * from newest to oldest, ensuring the root anon_vma gets freed last.
> @@ -489,7 +468,6 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
>  	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
>  		struct anon_vma *anon_vma = avc->anon_vma;
>  
> -		root = lock_anon_vma_root(root, anon_vma);
>  		anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
>  
>  		/*
> @@ -505,13 +483,14 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
>  		anon_vma_chain_free(avc);
>  	}
>  
> -	vma->anon_vma->num_active_vmas--;
> +	active_anon_vma->num_active_vmas--;
>  	/*
>  	 * vma would still be needed after unlink, and anon_vma will be prepared
>  	 * when handle fault.
>  	 */
>  	vma->anon_vma = NULL;
> -	unlock_anon_vma_root(root);
> +	anon_vma_unlock_write(active_anon_vma);
> +

nit: extra whitespace here.

>  
>  	/*
>  	 * Iterate the list once more, it now only contains empty and unlinked
> -- 
> 2.52.0
>