There is no reason to allocate the anon_vma_chain under the anon_vma write
lock when cloning - we can in fact assign these to the destination VMA
safely as we hold the exclusive mmap lock and therefore preclude anybody
else accessing these fields.
We only need take the anon_vma write lock when we link rbtree edges from
the anon_vma to the newly established AVCs.
This also allows us to eliminate the weird GFP_NOWAIT, GFP_KERNEL dance
introduced in commit dd34739c03f2 ("mm: avoid anon_vma_chain allocation
under anon_vma lock"), further simplifying this logic.
This should reduce lock anon_vma contention, and clarifies exactly where
the anon_vma lock is required.
We cannot adjust __anon_vma_prepare() in the same way as this is only
protected by VMA read lock, so we have to perform the allocation here under
the anon_vma write lock and page_table_lock (to protect against racing
threads), and we wish to retain the lock ordering.
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
mm/rmap.c | 49 +++++++++++++++++++++++++++++--------------------
1 file changed, 29 insertions(+), 20 deletions(-)
diff --git a/mm/rmap.c b/mm/rmap.c
index 60134a566073..de9de6d71c23 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -146,14 +146,13 @@ static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
}
-static void anon_vma_chain_link(struct vm_area_struct *vma,
- struct anon_vma_chain *avc,
- struct anon_vma *anon_vma)
+static void anon_vma_chain_assign(struct vm_area_struct *vma,
+ struct anon_vma_chain *avc,
+ struct anon_vma *anon_vma)
{
avc->vma = vma;
avc->anon_vma = anon_vma;
list_add(&avc->same_vma, &vma->anon_vma_chain);
- anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
}
/**
@@ -210,7 +209,8 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
spin_lock(&mm->page_table_lock);
if (likely(!vma->anon_vma)) {
vma->anon_vma = anon_vma;
- anon_vma_chain_link(vma, avc, anon_vma);
+ anon_vma_chain_assign(vma, avc, anon_vma);
+ anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
anon_vma->num_active_vmas++;
allocated = NULL;
avc = NULL;
@@ -287,20 +287,28 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
check_anon_vma_clone(dst, src);
+ /*
+ * Allocate AVCs. We don't need an anon_vma lock for this as we
+ * are not updating the anon_vma rbtree nor are we changing
+ * anon_vma statistics.
+ *
+ * We hold the mmap write lock so there's no possibliity of
+ * the unlinked AVC's being observed yet.
+ */
+ list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) {
+ avc = anon_vma_chain_alloc(GFP_KERNEL);
+ if (!avc)
+ goto enomem_failure;
+
+ anon_vma_chain_assign(dst, avc, pavc->anon_vma);
+ }
+
+ /* Now link the anon_vma's back to the newly inserted AVCs. */
anon_vma_lock_write(src->anon_vma);
- list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
- struct anon_vma *anon_vma;
-
- avc = anon_vma_chain_alloc(GFP_NOWAIT);
- if (unlikely(!avc)) {
- anon_vma_unlock_write(src->anon_vma);
- avc = anon_vma_chain_alloc(GFP_KERNEL);
- if (!avc)
- goto enomem_failure;
- anon_vma_lock_write(src->anon_vma);
- }
- anon_vma = pavc->anon_vma;
- anon_vma_chain_link(dst, avc, anon_vma);
+ list_for_each_entry_reverse(avc, &dst->anon_vma_chain, same_vma) {
+ struct anon_vma *anon_vma = avc->anon_vma;
+
+ anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
/*
* Reuse existing anon_vma if it has no vma and only one
@@ -316,7 +324,6 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
}
if (dst->anon_vma)
dst->anon_vma->num_active_vmas++;
-
anon_vma_unlock_write(src->anon_vma);
return 0;
@@ -385,8 +392,10 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
get_anon_vma(anon_vma->root);
/* Mark this anon_vma as the one where our new (COWed) pages go. */
vma->anon_vma = anon_vma;
+ anon_vma_chain_assign(vma, avc, anon_vma);
+ /* Now let rmap see it. */
anon_vma_lock_write(anon_vma);
- anon_vma_chain_link(vma, avc, anon_vma);
+ anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
anon_vma->parent->num_children++;
anon_vma_unlock_write(anon_vma);
--
2.52.0
On Wed, Dec 17, 2025 at 4:27 AM Lorenzo Stoakes
<lorenzo.stoakes@oracle.com> wrote:
>
> There is no reason to allocate the anon_vma_chain under the anon_vma write
> lock when cloning - we can in fact assign these to the destination VMA
> safely as we hold the exclusive mmap lock and therefore preclude anybody
> else accessing these fields.
>
> We only need take the anon_vma write lock when we link rbtree edges from
> the anon_vma to the newly established AVCs.
>
> This also allows us to eliminate the weird GFP_NOWAIT, GFP_KERNEL dance
> introduced in commit dd34739c03f2 ("mm: avoid anon_vma_chain allocation
> under anon_vma lock"), further simplifying this logic.
>
> This should reduce lock anon_vma contention, and clarifies exactly where
> the anon_vma lock is required.
>
> We cannot adjust __anon_vma_prepare() in the same way as this is only
> protected by VMA read lock, so we have to perform the allocation here under
> the anon_vma write lock and page_table_lock (to protect against racing
> threads), and we wish to retain the lock ordering.
>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
One nit but otherwise nice cleanup.
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
> ---
> mm/rmap.c | 49 +++++++++++++++++++++++++++++--------------------
> 1 file changed, 29 insertions(+), 20 deletions(-)
>
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 60134a566073..de9de6d71c23 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -146,14 +146,13 @@ static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
> kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
> }
>
> -static void anon_vma_chain_link(struct vm_area_struct *vma,
> - struct anon_vma_chain *avc,
> - struct anon_vma *anon_vma)
> +static void anon_vma_chain_assign(struct vm_area_struct *vma,
> + struct anon_vma_chain *avc,
> + struct anon_vma *anon_vma)
> {
> avc->vma = vma;
> avc->anon_vma = anon_vma;
> list_add(&avc->same_vma, &vma->anon_vma_chain);
> - anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
> }
>
> /**
> @@ -210,7 +209,8 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
> spin_lock(&mm->page_table_lock);
> if (likely(!vma->anon_vma)) {
> vma->anon_vma = anon_vma;
> - anon_vma_chain_link(vma, avc, anon_vma);
> + anon_vma_chain_assign(vma, avc, anon_vma);
> + anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
> anon_vma->num_active_vmas++;
> allocated = NULL;
> avc = NULL;
> @@ -287,20 +287,28 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
>
> check_anon_vma_clone(dst, src);
>
> + /*
> + * Allocate AVCs. We don't need an anon_vma lock for this as we
> + * are not updating the anon_vma rbtree nor are we changing
> + * anon_vma statistics.
> + *
> + * We hold the mmap write lock so there's no possibliity of
To be more specific, we are holding src's mmap write lock. I think
clarifying that will avoid any confusion.
> + * the unlinked AVC's being observed yet.
> + */
> + list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) {
> + avc = anon_vma_chain_alloc(GFP_KERNEL);
> + if (!avc)
> + goto enomem_failure;
> +
> + anon_vma_chain_assign(dst, avc, pavc->anon_vma);
> + }
> +
> + /* Now link the anon_vma's back to the newly inserted AVCs. */
> anon_vma_lock_write(src->anon_vma);
> - list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
> - struct anon_vma *anon_vma;
> -
> - avc = anon_vma_chain_alloc(GFP_NOWAIT);
> - if (unlikely(!avc)) {
> - anon_vma_unlock_write(src->anon_vma);
> - avc = anon_vma_chain_alloc(GFP_KERNEL);
> - if (!avc)
> - goto enomem_failure;
> - anon_vma_lock_write(src->anon_vma);
> - }
> - anon_vma = pavc->anon_vma;
> - anon_vma_chain_link(dst, avc, anon_vma);
> + list_for_each_entry_reverse(avc, &dst->anon_vma_chain, same_vma) {
> + struct anon_vma *anon_vma = avc->anon_vma;
> +
> + anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
>
> /*
> * Reuse existing anon_vma if it has no vma and only one
> @@ -316,7 +324,6 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
> }
> if (dst->anon_vma)
> dst->anon_vma->num_active_vmas++;
> -
> anon_vma_unlock_write(src->anon_vma);
> return 0;
>
> @@ -385,8 +392,10 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
> get_anon_vma(anon_vma->root);
> /* Mark this anon_vma as the one where our new (COWed) pages go. */
> vma->anon_vma = anon_vma;
> + anon_vma_chain_assign(vma, avc, anon_vma);
> + /* Now let rmap see it. */
> anon_vma_lock_write(anon_vma);
> - anon_vma_chain_link(vma, avc, anon_vma);
> + anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
> anon_vma->parent->num_children++;
> anon_vma_unlock_write(anon_vma);
>
> --
> 2.52.0
>
On Tue, Dec 30, 2025 at 01:35:41PM -0800, Suren Baghdasaryan wrote:
> On Wed, Dec 17, 2025 at 4:27 AM Lorenzo Stoakes
> <lorenzo.stoakes@oracle.com> wrote:
> >
> > There is no reason to allocate the anon_vma_chain under the anon_vma write
> > lock when cloning - we can in fact assign these to the destination VMA
> > safely as we hold the exclusive mmap lock and therefore preclude anybody
> > else accessing these fields.
> >
> > We only need take the anon_vma write lock when we link rbtree edges from
> > the anon_vma to the newly established AVCs.
> >
> > This also allows us to eliminate the weird GFP_NOWAIT, GFP_KERNEL dance
> > introduced in commit dd34739c03f2 ("mm: avoid anon_vma_chain allocation
> > under anon_vma lock"), further simplifying this logic.
> >
> > This should reduce lock anon_vma contention, and clarifies exactly where
> > the anon_vma lock is required.
> >
> > We cannot adjust __anon_vma_prepare() in the same way as this is only
> > protected by VMA read lock, so we have to perform the allocation here under
> > the anon_vma write lock and page_table_lock (to protect against racing
> > threads), and we wish to retain the lock ordering.
> >
> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>
> One nit but otherwise nice cleanup.
>
> Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Thanks!
One nice thing with the separate cleanup_partial_anon_vmas()'s function
introduced as part of this review (thanks for the good spot!) is we can now
simplify this _even further_ since we don't even insert anything into the
interval tree at the point of allocation, and so freeing is just a case of
freeing up AVC's.
>
> > ---
> > mm/rmap.c | 49 +++++++++++++++++++++++++++++--------------------
> > 1 file changed, 29 insertions(+), 20 deletions(-)
> >
> > diff --git a/mm/rmap.c b/mm/rmap.c
> > index 60134a566073..de9de6d71c23 100644
> > --- a/mm/rmap.c
> > +++ b/mm/rmap.c
> > @@ -146,14 +146,13 @@ static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
> > kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
> > }
> >
> > -static void anon_vma_chain_link(struct vm_area_struct *vma,
> > - struct anon_vma_chain *avc,
> > - struct anon_vma *anon_vma)
> > +static void anon_vma_chain_assign(struct vm_area_struct *vma,
> > + struct anon_vma_chain *avc,
> > + struct anon_vma *anon_vma)
> > {
> > avc->vma = vma;
> > avc->anon_vma = anon_vma;
> > list_add(&avc->same_vma, &vma->anon_vma_chain);
> > - anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
> > }
> >
> > /**
> > @@ -210,7 +209,8 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
> > spin_lock(&mm->page_table_lock);
> > if (likely(!vma->anon_vma)) {
> > vma->anon_vma = anon_vma;
> > - anon_vma_chain_link(vma, avc, anon_vma);
> > + anon_vma_chain_assign(vma, avc, anon_vma);
> > + anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
> > anon_vma->num_active_vmas++;
> > allocated = NULL;
> > avc = NULL;
> > @@ -287,20 +287,28 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
> >
> > check_anon_vma_clone(dst, src);
> >
> > + /*
> > + * Allocate AVCs. We don't need an anon_vma lock for this as we
> > + * are not updating the anon_vma rbtree nor are we changing
> > + * anon_vma statistics.
> > + *
> > + * We hold the mmap write lock so there's no possibliity of
>
> To be more specific, we are holding src's mmap write lock. I think
> clarifying that will avoid any confusion.
Well, it's the same mm for both right? :) and actually the observations
would be made around dst no? As that's where the unlinked AVC's are being
established.
I think more clear is 'We hold the exclusive mmap write lock' just to
highlight that it excludes anybody else from accessing these fields in the
VMA.
>
> > + * the unlinked AVC's being observed yet.
> > + */
> > + list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) {
> > + avc = anon_vma_chain_alloc(GFP_KERNEL);
> > + if (!avc)
> > + goto enomem_failure;
> > +
> > + anon_vma_chain_assign(dst, avc, pavc->anon_vma);
> > + }
> > +
> > + /* Now link the anon_vma's back to the newly inserted AVCs. */
> > anon_vma_lock_write(src->anon_vma);
> > - list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
> > - struct anon_vma *anon_vma;
> > -
> > - avc = anon_vma_chain_alloc(GFP_NOWAIT);
> > - if (unlikely(!avc)) {
> > - anon_vma_unlock_write(src->anon_vma);
> > - avc = anon_vma_chain_alloc(GFP_KERNEL);
> > - if (!avc)
> > - goto enomem_failure;
> > - anon_vma_lock_write(src->anon_vma);
> > - }
> > - anon_vma = pavc->anon_vma;
> > - anon_vma_chain_link(dst, avc, anon_vma);
> > + list_for_each_entry_reverse(avc, &dst->anon_vma_chain, same_vma) {
> > + struct anon_vma *anon_vma = avc->anon_vma;
> > +
> > + anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
> >
> > /*
> > * Reuse existing anon_vma if it has no vma and only one
> > @@ -316,7 +324,6 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
> > }
> > if (dst->anon_vma)
> > dst->anon_vma->num_active_vmas++;
> > -
> > anon_vma_unlock_write(src->anon_vma);
> > return 0;
> >
> > @@ -385,8 +392,10 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
> > get_anon_vma(anon_vma->root);
> > /* Mark this anon_vma as the one where our new (COWed) pages go. */
> > vma->anon_vma = anon_vma;
> > + anon_vma_chain_assign(vma, avc, anon_vma);
> > + /* Now let rmap see it. */
> > anon_vma_lock_write(anon_vma);
> > - anon_vma_chain_link(vma, avc, anon_vma);
> > + anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
> > anon_vma->parent->num_children++;
> > anon_vma_unlock_write(anon_vma);
> >
> > --
> > 2.52.0
> >
On Tue, Jan 6, 2026 at 6:17 AM Lorenzo Stoakes
<lorenzo.stoakes@oracle.com> wrote:
>
> On Tue, Dec 30, 2025 at 01:35:41PM -0800, Suren Baghdasaryan wrote:
> > On Wed, Dec 17, 2025 at 4:27 AM Lorenzo Stoakes
> > <lorenzo.stoakes@oracle.com> wrote:
> > >
> > > There is no reason to allocate the anon_vma_chain under the anon_vma write
> > > lock when cloning - we can in fact assign these to the destination VMA
> > > safely as we hold the exclusive mmap lock and therefore preclude anybody
> > > else accessing these fields.
> > >
> > > We only need take the anon_vma write lock when we link rbtree edges from
> > > the anon_vma to the newly established AVCs.
> > >
> > > This also allows us to eliminate the weird GFP_NOWAIT, GFP_KERNEL dance
> > > introduced in commit dd34739c03f2 ("mm: avoid anon_vma_chain allocation
> > > under anon_vma lock"), further simplifying this logic.
> > >
> > > This should reduce lock anon_vma contention, and clarifies exactly where
> > > the anon_vma lock is required.
> > >
> > > We cannot adjust __anon_vma_prepare() in the same way as this is only
> > > protected by VMA read lock, so we have to perform the allocation here under
> > > the anon_vma write lock and page_table_lock (to protect against racing
> > > threads), and we wish to retain the lock ordering.
> > >
> > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> >
> > One nit but otherwise nice cleanup.
> >
> > Reviewed-by: Suren Baghdasaryan <surenb@google.com>
>
> Thanks!
>
> One nice thing with the separate cleanup_partial_anon_vmas()'s function
> introduced as part of this review (thanks for the good spot!) is we can now
> simplify this _even further_ since we don't even insert anything into the
> interval tree at the point of allocation, and so freeing is just a case of
> freeing up AVC's.
>
> >
> > > ---
> > > mm/rmap.c | 49 +++++++++++++++++++++++++++++--------------------
> > > 1 file changed, 29 insertions(+), 20 deletions(-)
> > >
> > > diff --git a/mm/rmap.c b/mm/rmap.c
> > > index 60134a566073..de9de6d71c23 100644
> > > --- a/mm/rmap.c
> > > +++ b/mm/rmap.c
> > > @@ -146,14 +146,13 @@ static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
> > > kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
> > > }
> > >
> > > -static void anon_vma_chain_link(struct vm_area_struct *vma,
> > > - struct anon_vma_chain *avc,
> > > - struct anon_vma *anon_vma)
> > > +static void anon_vma_chain_assign(struct vm_area_struct *vma,
> > > + struct anon_vma_chain *avc,
> > > + struct anon_vma *anon_vma)
> > > {
> > > avc->vma = vma;
> > > avc->anon_vma = anon_vma;
> > > list_add(&avc->same_vma, &vma->anon_vma_chain);
> > > - anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
> > > }
> > >
> > > /**
> > > @@ -210,7 +209,8 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
> > > spin_lock(&mm->page_table_lock);
> > > if (likely(!vma->anon_vma)) {
> > > vma->anon_vma = anon_vma;
> > > - anon_vma_chain_link(vma, avc, anon_vma);
> > > + anon_vma_chain_assign(vma, avc, anon_vma);
> > > + anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
> > > anon_vma->num_active_vmas++;
> > > allocated = NULL;
> > > avc = NULL;
> > > @@ -287,20 +287,28 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
> > >
> > > check_anon_vma_clone(dst, src);
> > >
> > > + /*
> > > + * Allocate AVCs. We don't need an anon_vma lock for this as we
> > > + * are not updating the anon_vma rbtree nor are we changing
> > > + * anon_vma statistics.
> > > + *
> > > + * We hold the mmap write lock so there's no possibliity of
> >
> > To be more specific, we are holding src's mmap write lock. I think
> > clarifying that will avoid any confusion.
>
> Well, it's the same mm for both right? :)
Hmm. I think in dup_mmap()->anon_vma_fork()->anon_vma_clone() call
chain the dst->vm_mm and src->vm_mm are different, no? After
assignment at https://elixir.bootlin.com/linux/v6.19-rc4/source/mm/mmap.c#L1779
src->vm_mm is pointing to oldmm while dst->vm_mm is pointing to mm. Am
I reading this wrong?
> and actually the observations
> would be made around dst no? As that's where the unlinked AVC's are being
> established.
>
> I think more clear is 'We hold the exclusive mmap write lock' just to
> highlight that it excludes anybody else from accessing these fields in the
> VMA.
>
> >
> > > + * the unlinked AVC's being observed yet.
> > > + */
> > > + list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) {
> > > + avc = anon_vma_chain_alloc(GFP_KERNEL);
> > > + if (!avc)
> > > + goto enomem_failure;
> > > +
> > > + anon_vma_chain_assign(dst, avc, pavc->anon_vma);
> > > + }
> > > +
> > > + /* Now link the anon_vma's back to the newly inserted AVCs. */
> > > anon_vma_lock_write(src->anon_vma);
> > > - list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
> > > - struct anon_vma *anon_vma;
> > > -
> > > - avc = anon_vma_chain_alloc(GFP_NOWAIT);
> > > - if (unlikely(!avc)) {
> > > - anon_vma_unlock_write(src->anon_vma);
> > > - avc = anon_vma_chain_alloc(GFP_KERNEL);
> > > - if (!avc)
> > > - goto enomem_failure;
> > > - anon_vma_lock_write(src->anon_vma);
> > > - }
> > > - anon_vma = pavc->anon_vma;
> > > - anon_vma_chain_link(dst, avc, anon_vma);
> > > + list_for_each_entry_reverse(avc, &dst->anon_vma_chain, same_vma) {
> > > + struct anon_vma *anon_vma = avc->anon_vma;
> > > +
> > > + anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
> > >
> > > /*
> > > * Reuse existing anon_vma if it has no vma and only one
> > > @@ -316,7 +324,6 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
> > > }
> > > if (dst->anon_vma)
> > > dst->anon_vma->num_active_vmas++;
> > > -
> > > anon_vma_unlock_write(src->anon_vma);
> > > return 0;
> > >
> > > @@ -385,8 +392,10 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
> > > get_anon_vma(anon_vma->root);
> > > /* Mark this anon_vma as the one where our new (COWed) pages go. */
> > > vma->anon_vma = anon_vma;
> > > + anon_vma_chain_assign(vma, avc, anon_vma);
> > > + /* Now let rmap see it. */
> > > anon_vma_lock_write(anon_vma);
> > > - anon_vma_chain_link(vma, avc, anon_vma);
> > > + anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
> > > anon_vma->parent->num_children++;
> > > anon_vma_unlock_write(anon_vma);
> > >
> > > --
> > > 2.52.0
> > >
On Tue, Jan 06, 2026 at 01:20:29PM -0800, Suren Baghdasaryan wrote:
> > > > /**
> > > > @@ -210,7 +209,8 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
> > > > spin_lock(&mm->page_table_lock);
> > > > if (likely(!vma->anon_vma)) {
> > > > vma->anon_vma = anon_vma;
> > > > - anon_vma_chain_link(vma, avc, anon_vma);
> > > > + anon_vma_chain_assign(vma, avc, anon_vma);
> > > > + anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
> > > > anon_vma->num_active_vmas++;
> > > > allocated = NULL;
> > > > avc = NULL;
> > > > @@ -287,20 +287,28 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
> > > >
> > > > check_anon_vma_clone(dst, src);
> > > >
> > > > + /*
> > > > + * Allocate AVCs. We don't need an anon_vma lock for this as we
> > > > + * are not updating the anon_vma rbtree nor are we changing
> > > > + * anon_vma statistics.
> > > > + *
> > > > + * We hold the mmap write lock so there's no possibliity of
> > >
> > > To be more specific, we are holding src's mmap write lock. I think
> > > clarifying that will avoid any confusion.
> >
> > Well, it's the same mm for both right? :)
>
> Hmm. I think in dup_mmap()->anon_vma_fork()->anon_vma_clone() call
> chain the dst->vm_mm and src->vm_mm are different, no? After
> assignment at https://elixir.bootlin.com/linux/v6.19-rc4/source/mm/mmap.c#L1779
> src->vm_mm is pointing to oldmm while dst->vm_mm is pointing to mm. Am
> I reading this wrong?
Yup that's right sorry, and I even make that clear elsewhere, I'll send a
fix-patch or something on the v2.
Cheers, Lorenzo
© 2016 - 2026 Red Hat, Inc.