[PATCH v4 07/13] mm: khugepaged: collapse_pte_mapped_thp() use pte_offset_map_rw_nolock()

Qi Zheng posted 13 patches 2 months ago
There is a newer version of this series
[PATCH v4 07/13] mm: khugepaged: collapse_pte_mapped_thp() use pte_offset_map_rw_nolock()
Posted by Qi Zheng 2 months ago
In collapse_pte_mapped_thp(), we may modify the pte and pmd entry after
acquring the ptl, so convert it to using pte_offset_map_rw_nolock(). At
this time, the pte_same() check is not performed after the PTL held. So we
should get pgt_pmd and do pmd_same() check after the ptl held.

Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
 mm/khugepaged.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 6498721d4783a..8ab79c13d077f 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1605,7 +1605,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
 	if (userfaultfd_armed(vma) && !(vma->vm_flags & VM_SHARED))
 		pml = pmd_lock(mm, pmd);
 
-	start_pte = pte_offset_map_nolock(mm, pmd, haddr, &ptl);
+	start_pte = pte_offset_map_rw_nolock(mm, pmd, haddr, &pgt_pmd, &ptl);
 	if (!start_pte)		/* mmap_lock + page lock should prevent this */
 		goto abort;
 	if (!pml)
@@ -1613,6 +1613,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
 	else if (ptl != pml)
 		spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
 
+	if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd))))
+		goto abort;
+
 	/* step 2: clear page table and adjust rmap */
 	for (i = 0, addr = haddr, pte = start_pte;
 	     i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
@@ -1645,7 +1648,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
 		nr_ptes++;
 	}
 
-	pte_unmap(start_pte);
 	if (!pml)
 		spin_unlock(ptl);
 
@@ -1658,13 +1660,19 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
 	/* step 4: remove empty page table */
 	if (!pml) {
 		pml = pmd_lock(mm, pmd);
-		if (ptl != pml)
+		if (ptl != pml) {
 			spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
+			if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd)))) {
+				spin_unlock(pml);
+				goto abort;
+			}
+		}
 	}
 	pgt_pmd = pmdp_collapse_flush(vma, haddr, pmd);
 	pmdp_get_lockless_sync();
 	if (ptl != pml)
 		spin_unlock(ptl);
+	pte_unmap(start_pte);
 	spin_unlock(pml);
 
 	mmu_notifier_invalidate_range_end(&range);
-- 
2.20.1
Re: [PATCH v4 07/13] mm: khugepaged: collapse_pte_mapped_thp() use pte_offset_map_rw_nolock()
Posted by Muchun Song 2 months ago

> On Sep 24, 2024, at 14:11, Qi Zheng <zhengqi.arch@bytedance.com> wrote:
> In collapse_pte_mapped_thp(), we may modify the pte and pmd entry after
> acquring the ptl, so convert it to using pte_offset_map_rw_nolock(). At
> this time, the pte_same() check is not performed after the PTL held. So we
> should get pgt_pmd and do pmd_same() check after the ptl held.
> 
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
> mm/khugepaged.c | 14 +++++++++++---
> 1 file changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 6498721d4783a..8ab79c13d077f 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1605,7 +1605,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>    if (userfaultfd_armed(vma) && !(vma->vm_flags & VM_SHARED))
>        pml = pmd_lock(mm, pmd);
> 
> -    start_pte = pte_offset_map_nolock(mm, pmd, haddr, &ptl);
> +    start_pte = pte_offset_map_rw_nolock(mm, pmd, haddr, &pgt_pmd, &ptl);
>    if (!start_pte)        /* mmap_lock + page lock should prevent this */
>        goto abort;
>    if (!pml)
> @@ -1613,6 +1613,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>    else if (ptl != pml)
>        spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
> 
> +    if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd))))
> +        goto abort;
> +
>    /* step 2: clear page table and adjust rmap */
>    for (i = 0, addr = haddr, pte = start_pte;
>         i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
> @@ -1645,7 +1648,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>        nr_ptes++;
>    }
> 
> -    pte_unmap(start_pte);
>    if (!pml)
>        spin_unlock(ptl);
> 
> @@ -1658,13 +1660,19 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>    /* step 4: remove empty page table */
>    if (!pml) {
>        pml = pmd_lock(mm, pmd);
> -        if (ptl != pml)
> +        if (ptl != pml) {
>            spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
> +            if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd)))) {
> +                spin_unlock(pml);
> +                goto abort;

Drop the reference of folio and the mm counter twice at the label of abort and the step 3.

> +            }
> +        }
>    }
>    pgt_pmd = pmdp_collapse_flush(vma, haddr, pmd);
>    pmdp_get_lockless_sync();
>    if (ptl != pml)
>        spin_unlock(ptl);
> +    pte_unmap(start_pte);
>    spin_unlock(pml);

Why not?

pte_unmap_unlock(start_pte, ptl);
if (pml != ptl)
        spin_unlock(pml);

> 
>    mmu_notifier_invalidate_range_end(&range);
> --
> 2.20.1
Re: [PATCH v4 07/13] mm: khugepaged: collapse_pte_mapped_thp() use pte_offset_map_rw_nolock()
Posted by Qi Zheng 2 months ago

On 2024/9/24 15:14, Muchun Song wrote:
> 
> 
>> On Sep 24, 2024, at 14:11, Qi Zheng <zhengqi.arch@bytedance.com> wrote:
>> In collapse_pte_mapped_thp(), we may modify the pte and pmd entry after
>> acquring the ptl, so convert it to using pte_offset_map_rw_nolock(). At
>> this time, the pte_same() check is not performed after the PTL held. So we
>> should get pgt_pmd and do pmd_same() check after the ptl held.
>>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>> mm/khugepaged.c | 14 +++++++++++---
>> 1 file changed, 11 insertions(+), 3 deletions(-)
>>
>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>> index 6498721d4783a..8ab79c13d077f 100644
>> --- a/mm/khugepaged.c
>> +++ b/mm/khugepaged.c
>> @@ -1605,7 +1605,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>     if (userfaultfd_armed(vma) && !(vma->vm_flags & VM_SHARED))
>>         pml = pmd_lock(mm, pmd);
>>
>> -    start_pte = pte_offset_map_nolock(mm, pmd, haddr, &ptl);
>> +    start_pte = pte_offset_map_rw_nolock(mm, pmd, haddr, &pgt_pmd, &ptl);
>>     if (!start_pte)        /* mmap_lock + page lock should prevent this */
>>         goto abort;
>>     if (!pml)
>> @@ -1613,6 +1613,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>     else if (ptl != pml)
>>         spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
>>
>> +    if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd))))
>> +        goto abort;
>> +
>>     /* step 2: clear page table and adjust rmap */
>>     for (i = 0, addr = haddr, pte = start_pte;
>>          i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
>> @@ -1645,7 +1648,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>         nr_ptes++;
>>     }
>>
>> -    pte_unmap(start_pte);
>>     if (!pml)
>>         spin_unlock(ptl);
>>
>> @@ -1658,13 +1660,19 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>     /* step 4: remove empty page table */
>>     if (!pml) {
>>         pml = pmd_lock(mm, pmd);
>> -        if (ptl != pml)
>> +        if (ptl != pml) {
>>             spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
>> +            if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd)))) {
>> +                spin_unlock(pml);
>> +                goto abort;
> 
> Drop the reference of folio and the mm counter twice at the label of abort and the step 3.

My bad, should set nr_ptes to 0 and call flush_tlb_mm() here, right?

> 
>> +            }
>> +        }
>>     }
>>     pgt_pmd = pmdp_collapse_flush(vma, haddr, pmd);
>>     pmdp_get_lockless_sync();
>>     if (ptl != pml)
>>         spin_unlock(ptl);
>> +    pte_unmap(start_pte);
>>     spin_unlock(pml);
> 
> Why not?
> 
> pte_unmap_unlock(start_pte, ptl);
> if (pml != ptl)
>          spin_unlock(pml);

Both are fine, will do.

Thanks,
Qi

> 
>>
>>     mmu_notifier_invalidate_range_end(&range);
>> --
>> 2.20.1
Re: [PATCH v4 07/13] mm: khugepaged: collapse_pte_mapped_thp() use pte_offset_map_rw_nolock()
Posted by Muchun Song 2 months ago

> On Sep 24, 2024, at 15:29, Qi Zheng <zhengqi.arch@bytedance.com> wrote:
> 
> 
> 
> On 2024/9/24 15:14, Muchun Song wrote:
>>> On Sep 24, 2024, at 14:11, Qi Zheng <zhengqi.arch@bytedance.com> wrote:
>>> In collapse_pte_mapped_thp(), we may modify the pte and pmd entry after
>>> acquring the ptl, so convert it to using pte_offset_map_rw_nolock(). At
>>> this time, the pte_same() check is not performed after the PTL held. So we
>>> should get pgt_pmd and do pmd_same() check after the ptl held.
>>> 
>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>> ---
>>> mm/khugepaged.c | 14 +++++++++++---
>>> 1 file changed, 11 insertions(+), 3 deletions(-)
>>> 
>>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>>> index 6498721d4783a..8ab79c13d077f 100644
>>> --- a/mm/khugepaged.c
>>> +++ b/mm/khugepaged.c
>>> @@ -1605,7 +1605,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>>    if (userfaultfd_armed(vma) && !(vma->vm_flags & VM_SHARED))
>>>        pml = pmd_lock(mm, pmd);
>>> 
>>> -    start_pte = pte_offset_map_nolock(mm, pmd, haddr, &ptl);
>>> +    start_pte = pte_offset_map_rw_nolock(mm, pmd, haddr, &pgt_pmd, &ptl);
>>>    if (!start_pte)        /* mmap_lock + page lock should prevent this */
>>>        goto abort;
>>>    if (!pml)
>>> @@ -1613,6 +1613,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>>    else if (ptl != pml)
>>>        spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
>>> 
>>> +    if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd))))
>>> +        goto abort;
>>> +
>>>    /* step 2: clear page table and adjust rmap */
>>>    for (i = 0, addr = haddr, pte = start_pte;
>>>         i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
>>> @@ -1645,7 +1648,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>>        nr_ptes++;
>>>    }
>>> 
>>> -    pte_unmap(start_pte);
>>>    if (!pml)
>>>        spin_unlock(ptl);
>>> 
>>> @@ -1658,13 +1660,19 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>>    /* step 4: remove empty page table */
>>>    if (!pml) {
>>>        pml = pmd_lock(mm, pmd);
>>> -        if (ptl != pml)
>>> +        if (ptl != pml) {
>>>            spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
>>> +            if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd)))) {
>>> +                spin_unlock(pml);
>>> +                goto abort;
>> Drop the reference of folio and the mm counter twice at the label of abort and the step 3.
> 
> My bad, should set nr_ptes to 0 and call flush_tlb_mm() here, right?

Or add a new label "out" just below the "abort". Then go to out.

> 
>>> +            }
>>> +        }
>>>    }
>>>    pgt_pmd = pmdp_collapse_flush(vma, haddr, pmd);
>>>    pmdp_get_lockless_sync();
>>>    if (ptl != pml)
>>>        spin_unlock(ptl);
>>> +    pte_unmap(start_pte);
>>>    spin_unlock(pml);
>> Why not?
>> pte_unmap_unlock(start_pte, ptl);
>> if (pml != ptl)
>>         spin_unlock(pml);
> 
> Both are fine, will do.
> 
> Thanks,
> Qi
> 
>>> 
>>>    mmu_notifier_invalidate_range_end(&range);
>>> --
>>> 2.20.1
Re: [PATCH v4 07/13] mm: khugepaged: collapse_pte_mapped_thp() use pte_offset_map_rw_nolock()
Posted by Qi Zheng 2 months ago

On 2024/9/24 16:52, Muchun Song wrote:
> 
> 
>> On Sep 24, 2024, at 15:29, Qi Zheng <zhengqi.arch@bytedance.com> wrote:
>>
>>
>>
>> On 2024/9/24 15:14, Muchun Song wrote:
>>>> On Sep 24, 2024, at 14:11, Qi Zheng <zhengqi.arch@bytedance.com> wrote:
>>>> In collapse_pte_mapped_thp(), we may modify the pte and pmd entry after
>>>> acquring the ptl, so convert it to using pte_offset_map_rw_nolock(). At
>>>> this time, the pte_same() check is not performed after the PTL held. So we
>>>> should get pgt_pmd and do pmd_same() check after the ptl held.
>>>>
>>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>>> ---
>>>> mm/khugepaged.c | 14 +++++++++++---
>>>> 1 file changed, 11 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>>>> index 6498721d4783a..8ab79c13d077f 100644
>>>> --- a/mm/khugepaged.c
>>>> +++ b/mm/khugepaged.c
>>>> @@ -1605,7 +1605,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>>>     if (userfaultfd_armed(vma) && !(vma->vm_flags & VM_SHARED))
>>>>         pml = pmd_lock(mm, pmd);
>>>>
>>>> -    start_pte = pte_offset_map_nolock(mm, pmd, haddr, &ptl);
>>>> +    start_pte = pte_offset_map_rw_nolock(mm, pmd, haddr, &pgt_pmd, &ptl);
>>>>     if (!start_pte)        /* mmap_lock + page lock should prevent this */
>>>>         goto abort;
>>>>     if (!pml)
>>>> @@ -1613,6 +1613,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>>>     else if (ptl != pml)
>>>>         spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
>>>>
>>>> +    if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd))))
>>>> +        goto abort;
>>>> +
>>>>     /* step 2: clear page table and adjust rmap */
>>>>     for (i = 0, addr = haddr, pte = start_pte;
>>>>          i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
>>>> @@ -1645,7 +1648,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>>>         nr_ptes++;
>>>>     }
>>>>
>>>> -    pte_unmap(start_pte);
>>>>     if (!pml)
>>>>         spin_unlock(ptl);
>>>>
>>>> @@ -1658,13 +1660,19 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>>>     /* step 4: remove empty page table */
>>>>     if (!pml) {
>>>>         pml = pmd_lock(mm, pmd);
>>>> -        if (ptl != pml)
>>>> +        if (ptl != pml) {
>>>>             spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
>>>> +            if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd)))) {
>>>> +                spin_unlock(pml);
>>>> +                goto abort;
>>> Drop the reference of folio and the mm counter twice at the label of abort and the step 3.
>>
>> My bad, should set nr_ptes to 0 and call flush_tlb_mm() here, right?
> 
> Or add a new label "out" just below the "abort". Then go to out.

For this way, we also need to call flush_tlb_mm() first, like the
following:

if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd)))) {
	spin_unlock(pml);
	flush_tlb_mm(mm);
	goto out;
}

> 
>>
>>>> +            }
>>>> +        }
>>>>     }
>>>>     pgt_pmd = pmdp_collapse_flush(vma, haddr, pmd);
>>>>     pmdp_get_lockless_sync();
>>>>     if (ptl != pml)
>>>>         spin_unlock(ptl);
>>>> +    pte_unmap(start_pte);
>>>>     spin_unlock(pml);
>>> Why not?
>>> pte_unmap_unlock(start_pte, ptl);
>>> if (pml != ptl)
>>>          spin_unlock(pml);
>>
>> Both are fine, will do.
>>
>> Thanks,
>> Qi
>>
>>>>
>>>>     mmu_notifier_invalidate_range_end(&range);
>>>> --
>>>> 2.20.1
> 
> 
Re: [PATCH v4 07/13] mm: khugepaged: collapse_pte_mapped_thp() use pte_offset_map_rw_nolock()
Posted by Muchun Song 2 months ago

> On Sep 24, 2024, at 16:57, Qi Zheng <zhengqi.arch@bytedance.com> wrote:
> 
> 
> 
> On 2024/9/24 16:52, Muchun Song wrote:
>>> On Sep 24, 2024, at 15:29, Qi Zheng <zhengqi.arch@bytedance.com> wrote:
>>> 
>>> 
>>> 
>>> On 2024/9/24 15:14, Muchun Song wrote:
>>>>> On Sep 24, 2024, at 14:11, Qi Zheng <zhengqi.arch@bytedance.com> wrote:
>>>>> In collapse_pte_mapped_thp(), we may modify the pte and pmd entry after
>>>>> acquring the ptl, so convert it to using pte_offset_map_rw_nolock(). At
>>>>> this time, the pte_same() check is not performed after the PTL held. So we
>>>>> should get pgt_pmd and do pmd_same() check after the ptl held.
>>>>> 
>>>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>>>> ---
>>>>> mm/khugepaged.c | 14 +++++++++++---
>>>>> 1 file changed, 11 insertions(+), 3 deletions(-)
>>>>> 
>>>>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>>>>> index 6498721d4783a..8ab79c13d077f 100644
>>>>> --- a/mm/khugepaged.c
>>>>> +++ b/mm/khugepaged.c
>>>>> @@ -1605,7 +1605,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>>>>    if (userfaultfd_armed(vma) && !(vma->vm_flags & VM_SHARED))
>>>>>        pml = pmd_lock(mm, pmd);
>>>>> 
>>>>> -    start_pte = pte_offset_map_nolock(mm, pmd, haddr, &ptl);
>>>>> +    start_pte = pte_offset_map_rw_nolock(mm, pmd, haddr, &pgt_pmd, &ptl);
>>>>>    if (!start_pte)        /* mmap_lock + page lock should prevent this */
>>>>>        goto abort;
>>>>>    if (!pml)
>>>>> @@ -1613,6 +1613,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>>>>    else if (ptl != pml)
>>>>>        spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
>>>>> 
>>>>> +    if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd))))
>>>>> +        goto abort;
>>>>> +
>>>>>    /* step 2: clear page table and adjust rmap */
>>>>>    for (i = 0, addr = haddr, pte = start_pte;
>>>>>         i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
>>>>> @@ -1645,7 +1648,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>>>>        nr_ptes++;
>>>>>    }
>>>>> 
>>>>> -    pte_unmap(start_pte);
>>>>>    if (!pml)
>>>>>        spin_unlock(ptl);
>>>>> 
>>>>> @@ -1658,13 +1660,19 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
>>>>>    /* step 4: remove empty page table */
>>>>>    if (!pml) {
>>>>>        pml = pmd_lock(mm, pmd);
>>>>> -        if (ptl != pml)
>>>>> +        if (ptl != pml) {
>>>>>            spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
>>>>> +            if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd)))) {
>>>>> +                spin_unlock(pml);
>>>>> +                goto abort;
>>>> Drop the reference of folio and the mm counter twice at the label of abort and the step 3.
>>> 
>>> My bad, should set nr_ptes to 0 and call flush_tlb_mm() here, right?
>> Or add a new label "out" just below the "abort". Then go to out.
> 
> For this way, we also need to call flush_tlb_mm() first, like the
> following:
> 
> if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd)))) {
> 	spin_unlock(pml);
> 	flush_tlb_mm(mm);
> 	goto out;
> }

Fine.

> 
>>> 
>>>>> +            }
>>>>> +        }
>>>>>    }
>>>>>    pgt_pmd = pmdp_collapse_flush(vma, haddr, pmd);
>>>>>    pmdp_get_lockless_sync();
>>>>>    if (ptl != pml)
>>>>>        spin_unlock(ptl);
>>>>> +    pte_unmap(start_pte);
>>>>>    spin_unlock(pml);
>>>> Why not?
>>>> pte_unmap_unlock(start_pte, ptl);
>>>> if (pml != ptl)
>>>>         spin_unlock(pml);
>>> 
>>> Both are fine, will do.
>>> 
>>> Thanks,
>>> Qi
>>> 
>>>>> 
>>>>>    mmu_notifier_invalidate_range_end(&range);
>>>>> --
>>>>> 2.20.1