[PATCH] mm: thp: don't have to lock page anymore when splitting PMD

Yang Shi posted 1 patch 4 years, 3 months ago
mm/huge_memory.c | 44 +++++---------------------------------------
1 file changed, 5 insertions(+), 39 deletions(-)
[PATCH] mm: thp: don't have to lock page anymore when splitting PMD
Posted by Yang Shi 4 years, 3 months ago
The commit c444eb564fb1 ("mm: thp: make the THP mapcount atomic against
__split_huge_pmd_locked()") locked the page for PMD split to make
mapcount stable for reuse_swap_page(), then commit 1c2f67308af4 ("mm:
thp: fix MADV_REMOVE deadlock on shmem THP") reduce the scope to
anonymous page only.

However COW has not used mapcount to determine if the page is shared or
not anymore due to the COW fixes [1] from David Hildenbrand and the
reuse_swap_page() was removed as well.  So PMD split doesn't have to
lock the page anymore.  This patch basically reverted the above two
commits.

[1] https://lore.kernel.org/linux-mm/20220131162940.210846-1-david@redhat.com/

Cc: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Yang Shi <shy828301@gmail.com>
---
 mm/huge_memory.c | 44 +++++---------------------------------------
 1 file changed, 5 insertions(+), 39 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b49e1a11df2e..daaa698bd273 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2134,8 +2134,6 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 {
 	spinlock_t *ptl;
 	struct mmu_notifier_range range;
-	bool do_unlock_folio = false;
-	pmd_t _pmd;
 
 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
 				address & HPAGE_PMD_MASK,
@@ -2148,48 +2146,16 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 	 * pmd against. Otherwise we can end up replacing wrong folio.
 	 */
 	VM_BUG_ON(freeze && !folio);
-	if (folio) {
-		VM_WARN_ON_ONCE(!folio_test_locked(folio));
-		if (folio != page_folio(pmd_page(*pmd)))
-			goto out;
-	}
+	if (folio && folio != page_folio(pmd_page(*pmd)))
+		goto out;
 
-repeat:
-	if (pmd_trans_huge(*pmd)) {
-		if (!folio) {
-			folio = page_folio(pmd_page(*pmd));
-			/*
-			 * An anonymous page must be locked, to ensure that a
-			 * concurrent reuse_swap_page() sees stable mapcount;
-			 * but reuse_swap_page() is not used on shmem or file,
-			 * and page lock must not be taken when zap_pmd_range()
-			 * calls __split_huge_pmd() while i_mmap_lock is held.
-			 */
-			if (folio_test_anon(folio)) {
-				if (unlikely(!folio_trylock(folio))) {
-					folio_get(folio);
-					_pmd = *pmd;
-					spin_unlock(ptl);
-					folio_lock(folio);
-					spin_lock(ptl);
-					if (unlikely(!pmd_same(*pmd, _pmd))) {
-						folio_unlock(folio);
-						folio_put(folio);
-						folio = NULL;
-						goto repeat;
-					}
-					folio_put(folio);
-				}
-				do_unlock_folio = true;
-			}
-		}
-	} else if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd)))
+	if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd)))
 		goto out;
+
 	__split_huge_pmd_locked(vma, pmd, range.start, freeze);
 out:
 	spin_unlock(ptl);
-	if (do_unlock_folio)
-		folio_unlock(folio);
+
 	/*
 	 * No need to double call mmu_notifier->invalidate_range() callback.
 	 * They are 3 cases to consider inside __split_huge_pmd_locked():
-- 
2.26.3
Re: [PATCH] mm: thp: don't have to lock page anymore when splitting PMD
Posted by Miaohe Lin 4 years, 3 months ago
On 2022/3/4 6:20, Yang Shi wrote:
> The commit c444eb564fb1 ("mm: thp: make the THP mapcount atomic against
> __split_huge_pmd_locked()") locked the page for PMD split to make
> mapcount stable for reuse_swap_page(), then commit 1c2f67308af4 ("mm:
> thp: fix MADV_REMOVE deadlock on shmem THP") reduce the scope to
> anonymous page only.
> 
> However COW has not used mapcount to determine if the page is shared or
> not anymore due to the COW fixes [1] from David Hildenbrand and the
> reuse_swap_page() was removed as well.  So PMD split doesn't have to
> lock the page anymore.  This patch basically reverted the above two
> commits.
> 

Sounds reasonable. Since mapcount is not used to determine if we need to COW
the page, PMD split doesn't have to lock the page anymore.

> [1] https://lore.kernel.org/linux-mm/20220131162940.210846-1-david@redhat.com/
> 
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Andrea Arcangeli <aarcange@redhat.com>
> Cc: Hugh Dickins <hughd@google.com>
> Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
> Signed-off-by: Yang Shi <shy828301@gmail.com>
> ---
>  mm/huge_memory.c | 44 +++++---------------------------------------
>  1 file changed, 5 insertions(+), 39 deletions(-)
> 
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index b49e1a11df2e..daaa698bd273 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2134,8 +2134,6 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
>  {
>  	spinlock_t *ptl;
>  	struct mmu_notifier_range range;
> -	bool do_unlock_folio = false;
> -	pmd_t _pmd;
>  
>  	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
>  				address & HPAGE_PMD_MASK,
> @@ -2148,48 +2146,16 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
>  	 * pmd against. Otherwise we can end up replacing wrong folio.
>  	 */
>  	VM_BUG_ON(freeze && !folio);
> -	if (folio) {
> -		VM_WARN_ON_ONCE(!folio_test_locked(folio));
> -		if (folio != page_folio(pmd_page(*pmd)))
> -			goto out;
> -	}
> +	if (folio && folio != page_folio(pmd_page(*pmd)))
> +		goto out;
>  
> -repeat:
> -	if (pmd_trans_huge(*pmd)) {
> -		if (!folio) {
> -			folio = page_folio(pmd_page(*pmd));
> -			/*
> -			 * An anonymous page must be locked, to ensure that a
> -			 * concurrent reuse_swap_page() sees stable mapcount;
> -			 * but reuse_swap_page() is not used on shmem or file,
> -			 * and page lock must not be taken when zap_pmd_range()
> -			 * calls __split_huge_pmd() while i_mmap_lock is held.
> -			 */
> -			if (folio_test_anon(folio)) {
> -				if (unlikely(!folio_trylock(folio))) {
> -					folio_get(folio);
> -					_pmd = *pmd;
> -					spin_unlock(ptl);
> -					folio_lock(folio);
> -					spin_lock(ptl);
> -					if (unlikely(!pmd_same(*pmd, _pmd))) {
> -						folio_unlock(folio);
> -						folio_put(folio);
> -						folio = NULL;
> -						goto repeat;
> -					}
> -					folio_put(folio);
> -				}
> -				do_unlock_folio = true;
> -			}
> -		}
> -	} else if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd)))
> +	if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd)))
>  		goto out;
> +
>  	__split_huge_pmd_locked(vma, pmd, range.start, freeze);

IUUC, here should be something like below:
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd))
        __split_huge_pmd_locked(vma, pmd, range.start, freeze);

i.e. the pmd_trans_huge case is missing in the above change. Or am I miss something ?

Thanks for the patch. This really simplify the code and avoid the unneeded overhead.

>  out:
>  	spin_unlock(ptl);
> -	if (do_unlock_folio)
> -		folio_unlock(folio);
> +
>  	/*
>  	 * No need to double call mmu_notifier->invalidate_range() callback.
>  	 * They are 3 cases to consider inside __split_huge_pmd_locked():
>
Re: [PATCH] mm: thp: don't have to lock page anymore when splitting PMD
Posted by Yang Shi 4 years, 3 months ago
On Thu, Mar 3, 2022 at 6:25 PM Miaohe Lin <linmiaohe@huawei.com> wrote:
>
> On 2022/3/4 6:20, Yang Shi wrote:
> > The commit c444eb564fb1 ("mm: thp: make the THP mapcount atomic against
> > __split_huge_pmd_locked()") locked the page for PMD split to make
> > mapcount stable for reuse_swap_page(), then commit 1c2f67308af4 ("mm:
> > thp: fix MADV_REMOVE deadlock on shmem THP") reduce the scope to
> > anonymous page only.
> >
> > However COW has not used mapcount to determine if the page is shared or
> > not anymore due to the COW fixes [1] from David Hildenbrand and the
> > reuse_swap_page() was removed as well.  So PMD split doesn't have to
> > lock the page anymore.  This patch basically reverted the above two
> > commits.
> >
>
> Sounds reasonable. Since mapcount is not used to determine if we need to COW
> the page, PMD split doesn't have to lock the page anymore.
>
> > [1] https://lore.kernel.org/linux-mm/20220131162940.210846-1-david@redhat.com/
> >
> > Cc: David Hildenbrand <david@redhat.com>
> > Cc: Andrea Arcangeli <aarcange@redhat.com>
> > Cc: Hugh Dickins <hughd@google.com>
> > Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
> > Signed-off-by: Yang Shi <shy828301@gmail.com>
> > ---
> >  mm/huge_memory.c | 44 +++++---------------------------------------
> >  1 file changed, 5 insertions(+), 39 deletions(-)
> >
> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > index b49e1a11df2e..daaa698bd273 100644
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -2134,8 +2134,6 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
> >  {
> >       spinlock_t *ptl;
> >       struct mmu_notifier_range range;
> > -     bool do_unlock_folio = false;
> > -     pmd_t _pmd;
> >
> >       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
> >                               address & HPAGE_PMD_MASK,
> > @@ -2148,48 +2146,16 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
> >        * pmd against. Otherwise we can end up replacing wrong folio.
> >        */
> >       VM_BUG_ON(freeze && !folio);
> > -     if (folio) {
> > -             VM_WARN_ON_ONCE(!folio_test_locked(folio));
> > -             if (folio != page_folio(pmd_page(*pmd)))
> > -                     goto out;
> > -     }
> > +     if (folio && folio != page_folio(pmd_page(*pmd)))
> > +             goto out;
> >
> > -repeat:
> > -     if (pmd_trans_huge(*pmd)) {
> > -             if (!folio) {
> > -                     folio = page_folio(pmd_page(*pmd));
> > -                     /*
> > -                      * An anonymous page must be locked, to ensure that a
> > -                      * concurrent reuse_swap_page() sees stable mapcount;
> > -                      * but reuse_swap_page() is not used on shmem or file,
> > -                      * and page lock must not be taken when zap_pmd_range()
> > -                      * calls __split_huge_pmd() while i_mmap_lock is held.
> > -                      */
> > -                     if (folio_test_anon(folio)) {
> > -                             if (unlikely(!folio_trylock(folio))) {
> > -                                     folio_get(folio);
> > -                                     _pmd = *pmd;
> > -                                     spin_unlock(ptl);
> > -                                     folio_lock(folio);
> > -                                     spin_lock(ptl);
> > -                                     if (unlikely(!pmd_same(*pmd, _pmd))) {
> > -                                             folio_unlock(folio);
> > -                                             folio_put(folio);
> > -                                             folio = NULL;
> > -                                             goto repeat;
> > -                                     }
> > -                                     folio_put(folio);
> > -                             }
> > -                             do_unlock_folio = true;
> > -                     }
> > -             }
> > -     } else if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd)))
> > +     if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd)))
> >               goto out;
> > +
> >       __split_huge_pmd_locked(vma, pmd, range.start, freeze);
>
> IUUC, here should be something like below:
> if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd))
>         __split_huge_pmd_locked(vma, pmd, range.start, freeze);
>
> i.e. the pmd_trans_huge case is missing in the above change. Or am I miss something ?

Yes, you are definitely right. Must have pmd_trans_huge(*pmd).

>
> Thanks for the patch. This really simplify the code and avoid the unneeded overhead.
>
> >  out:
> >       spin_unlock(ptl);
> > -     if (do_unlock_folio)
> > -             folio_unlock(folio);
> > +
> >       /*
> >        * No need to double call mmu_notifier->invalidate_range() callback.
> >        * They are 3 cases to consider inside __split_huge_pmd_locked():
> >
>