[PATCH mm-unstable v1 5/5] mm/khugepaged: unify khugepaged and madv_collapse with collapse_single_pmd()

Nico Pache posted 5 patches 1 month, 2 weeks ago
There is a newer version of this series
[PATCH mm-unstable v1 5/5] mm/khugepaged: unify khugepaged and madv_collapse with collapse_single_pmd()
Posted by Nico Pache 1 month, 2 weeks ago
The khugepaged daemon and madvise_collapse have two different
implementations that do almost the same thing.

Create collapse_single_pmd to increase code reuse and create an entry
point to these two users.

Refactor madvise_collapse and collapse_scan_mm_slot to use the new
collapse_single_pmd function. This introduces a minor behavioral change
that is most likely an undiscovered bug. The current implementation of
khugepaged tests collapse_test_exit_or_disable before calling
collapse_pte_mapped_thp, but we weren't doing it in the madvise_collapse
case. By unifying these two callers madvise_collapse now also performs
this check. We also modify the return value to be SCAN_ANY_PROCESS which
properly indicates that this process is no longer valid to operate on.

We also guard the khugepaged_pages_collapsed variable to ensure its only
incremented for khugepaged.

Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Nico Pache <npache@redhat.com>
---
 mm/khugepaged.c | 121 ++++++++++++++++++++++++++----------------------
 1 file changed, 66 insertions(+), 55 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index fa41480f6948..0839a781bedd 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2395,6 +2395,62 @@ static enum scan_result collapse_scan_file(struct mm_struct *mm, unsigned long a
 	return result;
 }
 
+/*
+ * Try to collapse a single PMD starting at a PMD aligned addr, and return
+ * the results.
+ */
+static enum scan_result collapse_single_pmd(unsigned long addr,
+		struct vm_area_struct *vma, bool *mmap_locked,
+		struct collapse_control *cc)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	enum scan_result result;
+	struct file *file;
+	pgoff_t pgoff;
+
+	if (vma_is_anonymous(vma)) {
+		result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc);
+		goto end;
+	}
+
+	file = get_file(vma->vm_file);
+	pgoff = linear_page_index(vma, addr);
+
+	mmap_read_unlock(mm);
+	*mmap_locked = false;
+	result = collapse_scan_file(mm, addr, file, pgoff, cc);
+
+	if (!cc->is_khugepaged && result == SCAN_PAGE_DIRTY_OR_WRITEBACK &&
+	    mapping_can_writeback(file->f_mapping)) {
+		const loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
+		const loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
+
+		filemap_write_and_wait_range(file->f_mapping, lstart, lend);
+	}
+	fput(file);
+
+	if (result != SCAN_PTE_MAPPED_HUGEPAGE)
+		goto end;
+
+	mmap_read_lock(mm);
+	*mmap_locked = true;
+	if (collapse_test_exit_or_disable(mm)) {
+		mmap_read_unlock(mm);
+		*mmap_locked = false;
+		return SCAN_ANY_PROCESS;
+	}
+	result = try_collapse_pte_mapped_thp(mm, addr, !cc->is_khugepaged);
+	if (result == SCAN_PMD_MAPPED)
+		result = SCAN_SUCCEED;
+	mmap_read_unlock(mm);
+	*mmap_locked = false;
+
+end:
+	if (cc->is_khugepaged && result == SCAN_SUCCEED)
+		++khugepaged_pages_collapsed;
+	return result;
+}
+
 static unsigned int collapse_scan_mm_slot(unsigned int pages, enum scan_result *result,
 					    struct collapse_control *cc)
 	__releases(&khugepaged_mm_lock)
@@ -2466,34 +2522,9 @@ static unsigned int collapse_scan_mm_slot(unsigned int pages, enum scan_result *
 			VM_BUG_ON(khugepaged_scan.address < hstart ||
 				  khugepaged_scan.address + HPAGE_PMD_SIZE >
 				  hend);
-			if (!vma_is_anonymous(vma)) {
-				struct file *file = get_file(vma->vm_file);
-				pgoff_t pgoff = linear_page_index(vma,
-						khugepaged_scan.address);
-
-				mmap_read_unlock(mm);
-				mmap_locked = false;
-				*result = collapse_scan_file(mm,
-					khugepaged_scan.address, file, pgoff, cc);
-				fput(file);
-				if (*result == SCAN_PTE_MAPPED_HUGEPAGE) {
-					mmap_read_lock(mm);
-					if (collapse_test_exit_or_disable(mm))
-						goto breakouterloop;
-					*result = try_collapse_pte_mapped_thp(mm,
-						khugepaged_scan.address, false);
-					if (*result == SCAN_PMD_MAPPED)
-						*result = SCAN_SUCCEED;
-					mmap_read_unlock(mm);
-				}
-			} else {
-				*result = collapse_scan_pmd(mm, vma,
-					khugepaged_scan.address, &mmap_locked, cc);
-			}
-
-			if (*result == SCAN_SUCCEED)
-				++khugepaged_pages_collapsed;
 
+			*result = collapse_single_pmd(khugepaged_scan.address,
+						      vma, &mmap_locked, cc);
 			/* move to next address */
 			khugepaged_scan.address += HPAGE_PMD_SIZE;
 			progress += HPAGE_PMD_NR;
@@ -2799,6 +2830,7 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
 			cond_resched();
 			mmap_read_lock(mm);
 			mmap_locked = true;
+			*lock_dropped = true;
 			result = hugepage_vma_revalidate(mm, addr, false, &vma,
 							 cc);
 			if (result  != SCAN_SUCCEED) {
@@ -2809,46 +2841,25 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
 			hend = min(hend, vma->vm_end & HPAGE_PMD_MASK);
 		}
 		mmap_assert_locked(mm);
-		if (!vma_is_anonymous(vma)) {
-			struct file *file = get_file(vma->vm_file);
-			pgoff_t pgoff = linear_page_index(vma, addr);
 
-			mmap_read_unlock(mm);
-			mmap_locked = false;
-			*lock_dropped = true;
-			result = collapse_scan_file(mm, addr, file, pgoff, cc);
-
-			if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb &&
-			    mapping_can_writeback(file->f_mapping)) {
-				loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
-				loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
+		result = collapse_single_pmd(addr, vma, &mmap_locked, cc);
 
-				filemap_write_and_wait_range(file->f_mapping, lstart, lend);
-				triggered_wb = true;
-				fput(file);
-				goto retry;
-			}
-			fput(file);
-		} else {
-			result = collapse_scan_pmd(mm, vma, addr, &mmap_locked, cc);
-		}
 		if (!mmap_locked)
 			*lock_dropped = true;
 
-handle_result:
+		if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb) {
+			triggered_wb = true;
+			goto retry;
+		}
+
 		switch (result) {
 		case SCAN_SUCCEED:
 		case SCAN_PMD_MAPPED:
 			++thps;
 			break;
-		case SCAN_PTE_MAPPED_HUGEPAGE:
-			BUG_ON(mmap_locked);
-			mmap_read_lock(mm);
-			result = try_collapse_pte_mapped_thp(mm, addr, true);
-			mmap_read_unlock(mm);
-			goto handle_result;
 		/* Whitelisted set of results where continuing OK */
 		case SCAN_NO_PTE_TABLE:
+		case SCAN_PTE_MAPPED_HUGEPAGE:
 		case SCAN_PTE_NON_PRESENT:
 		case SCAN_PTE_UFFD_WP:
 		case SCAN_LACK_REFERENCED_PAGE:
-- 
2.53.0
Re: [PATCH mm-unstable v1 5/5] mm/khugepaged: unify khugepaged and madv_collapse with collapse_single_pmd()
Posted by David Hildenbrand (Arm) 1 month, 2 weeks ago
On 2/12/26 03:25, Nico Pache wrote:
> The khugepaged daemon and madvise_collapse have two different
> implementations that do almost the same thing.
> 
> Create collapse_single_pmd to increase code reuse and create an entry
> point to these two users.
> 
> Refactor madvise_collapse and collapse_scan_mm_slot to use the new
> collapse_single_pmd function. This introduces a minor behavioral change
> that is most likely an undiscovered bug. The current implementation of
> khugepaged tests collapse_test_exit_or_disable before calling
> collapse_pte_mapped_thp, but we weren't doing it in the madvise_collapse
> case. By unifying these two callers madvise_collapse now also performs
> this check. We also modify the return value to be SCAN_ANY_PROCESS which
> properly indicates that this process is no longer valid to operate on.
> 
> We also guard the khugepaged_pages_collapsed variable to ensure its only
> incremented for khugepaged.
> 
> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Signed-off-by: Nico Pache <npache@redhat.com>
> ---
>   mm/khugepaged.c | 121 ++++++++++++++++++++++++++----------------------
>   1 file changed, 66 insertions(+), 55 deletions(-)
> 
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index fa41480f6948..0839a781bedd 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -2395,6 +2395,62 @@ static enum scan_result collapse_scan_file(struct mm_struct *mm, unsigned long a
>   	return result;
>   }
>   
> +/*
> + * Try to collapse a single PMD starting at a PMD aligned addr, and return
> + * the results.
> + */
> +static enum scan_result collapse_single_pmd(unsigned long addr,
> +		struct vm_area_struct *vma, bool *mmap_locked,
> +		struct collapse_control *cc)
> +{
> +	struct mm_struct *mm = vma->vm_mm;
> +	enum scan_result result;
> +	struct file *file;
> +	pgoff_t pgoff;
> +
> +	if (vma_is_anonymous(vma)) {
> +		result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc);
> +		goto end;
> +	}
> +
> +	file = get_file(vma->vm_file);
> +	pgoff = linear_page_index(vma, addr);
> +
> +	mmap_read_unlock(mm);
> +	*mmap_locked = false;
> +	result = collapse_scan_file(mm, addr, file, pgoff, cc);
> +
> +	if (!cc->is_khugepaged && result == SCAN_PAGE_DIRTY_OR_WRITEBACK &&
> +	    mapping_can_writeback(file->f_mapping)) {
> +		const loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
> +		const loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
> +
> +		filemap_write_and_wait_range(file->f_mapping, lstart, lend);
> +	}
> +	fput(file);
> +
> +	if (result != SCAN_PTE_MAPPED_HUGEPAGE)
> +		goto end;
> +
> +	mmap_read_lock(mm);
> +	*mmap_locked = true;
> +	if (collapse_test_exit_or_disable(mm)) {
> +		mmap_read_unlock(mm);
> +		*mmap_locked = false;
> +		return SCAN_ANY_PROCESS;
> +	}
> +	result = try_collapse_pte_mapped_thp(mm, addr, !cc->is_khugepaged);
> +	if (result == SCAN_PMD_MAPPED)
> +		result = SCAN_SUCCEED;
> +	mmap_read_unlock(mm);
> +	*mmap_locked = false;
> +
> +end:
> +	if (cc->is_khugepaged && result == SCAN_SUCCEED)
> +		++khugepaged_pages_collapsed;
> +	return result;
> +}
> +
>   static unsigned int collapse_scan_mm_slot(unsigned int pages, enum scan_result *result,
>   					    struct collapse_control *cc)
>   	__releases(&khugepaged_mm_lock)
> @@ -2466,34 +2522,9 @@ static unsigned int collapse_scan_mm_slot(unsigned int pages, enum scan_result *
>   			VM_BUG_ON(khugepaged_scan.address < hstart ||
>   				  khugepaged_scan.address + HPAGE_PMD_SIZE >
>   				  hend);
> -			if (!vma_is_anonymous(vma)) {
> -				struct file *file = get_file(vma->vm_file);
> -				pgoff_t pgoff = linear_page_index(vma,
> -						khugepaged_scan.address);
> -
> -				mmap_read_unlock(mm);
> -				mmap_locked = false;
> -				*result = collapse_scan_file(mm,
> -					khugepaged_scan.address, file, pgoff, cc);
> -				fput(file);
> -				if (*result == SCAN_PTE_MAPPED_HUGEPAGE) {
> -					mmap_read_lock(mm);
> -					if (collapse_test_exit_or_disable(mm))
> -						goto breakouterloop;
> -					*result = try_collapse_pte_mapped_thp(mm,
> -						khugepaged_scan.address, false);
> -					if (*result == SCAN_PMD_MAPPED)
> -						*result = SCAN_SUCCEED;
> -					mmap_read_unlock(mm);
> -				}
> -			} else {
> -				*result = collapse_scan_pmd(mm, vma,
> -					khugepaged_scan.address, &mmap_locked, cc);
> -			}
> -
> -			if (*result == SCAN_SUCCEED)
> -				++khugepaged_pages_collapsed;
>   
> +			*result = collapse_single_pmd(khugepaged_scan.address,
> +						      vma, &mmap_locked, cc);
>   			/* move to next address */
>   			khugepaged_scan.address += HPAGE_PMD_SIZE;
>   			progress += HPAGE_PMD_NR;
> @@ -2799,6 +2830,7 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
>   			cond_resched();
>   			mmap_read_lock(mm);
>   			mmap_locked = true;
> +			*lock_dropped = true;
>   			result = hugepage_vma_revalidate(mm, addr, false, &vma,
>   							 cc);
>   			if (result  != SCAN_SUCCEED) {
> @@ -2809,46 +2841,25 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
>   			hend = min(hend, vma->vm_end & HPAGE_PMD_MASK);
>   		}
>   		mmap_assert_locked(mm);
> -		if (!vma_is_anonymous(vma)) {
> -			struct file *file = get_file(vma->vm_file);
> -			pgoff_t pgoff = linear_page_index(vma, addr);
>   
> -			mmap_read_unlock(mm);
> -			mmap_locked = false;
> -			*lock_dropped = true;
> -			result = collapse_scan_file(mm, addr, file, pgoff, cc);
> -
> -			if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb &&
> -			    mapping_can_writeback(file->f_mapping)) {
> -				loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
> -				loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
> +		result = collapse_single_pmd(addr, vma, &mmap_locked, cc);
>   
> -				filemap_write_and_wait_range(file->f_mapping, lstart, lend);
> -				triggered_wb = true;
> -				fput(file);
> -				goto retry;
> -			}
> -			fput(file);
> -		} else {
> -			result = collapse_scan_pmd(mm, vma, addr, &mmap_locked, cc);
> -		}
>   		if (!mmap_locked)
>   			*lock_dropped = true;
>   
> -handle_result:
> +		if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb) {
> +			triggered_wb = true;
> +			goto retry;
> +		}

Having triggered_wb set where writeback is not actually triggered is 
suboptimal.

And you can tell that by realizing that you would now retry once even 
thought the mapping does not support writeback ( 
mapping_can_writeback(file->f_mapping)) and no writeback actually happened.

Further, we would also try to call filemap_write_and_wait_range() now 
twice instead of only during the first round.

-- 
Cheers,

David
Re: [PATCH mm-unstable v1 5/5] mm/khugepaged: unify khugepaged and madv_collapse with collapse_single_pmd()
Posted by Nico Pache 1 month, 2 weeks ago
On Thu, Feb 12, 2026 at 1:04 PM David Hildenbrand (Arm)
<david@kernel.org> wrote:
>
> On 2/12/26 03:25, Nico Pache wrote:
> > The khugepaged daemon and madvise_collapse have two different
> > implementations that do almost the same thing.
> >
> > Create collapse_single_pmd to increase code reuse and create an entry
> > point to these two users.
> >
> > Refactor madvise_collapse and collapse_scan_mm_slot to use the new
> > collapse_single_pmd function. This introduces a minor behavioral change
> > that is most likely an undiscovered bug. The current implementation of
> > khugepaged tests collapse_test_exit_or_disable before calling
> > collapse_pte_mapped_thp, but we weren't doing it in the madvise_collapse
> > case. By unifying these two callers madvise_collapse now also performs
> > this check. We also modify the return value to be SCAN_ANY_PROCESS which
> > properly indicates that this process is no longer valid to operate on.
> >
> > We also guard the khugepaged_pages_collapsed variable to ensure its only
> > incremented for khugepaged.
> >
> > Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > Signed-off-by: Nico Pache <npache@redhat.com>
> > ---
> >   mm/khugepaged.c | 121 ++++++++++++++++++++++++++----------------------
> >   1 file changed, 66 insertions(+), 55 deletions(-)
> >
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index fa41480f6948..0839a781bedd 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -2395,6 +2395,62 @@ static enum scan_result collapse_scan_file(struct mm_struct *mm, unsigned long a
> >       return result;
> >   }
> >
> > +/*
> > + * Try to collapse a single PMD starting at a PMD aligned addr, and return
> > + * the results.
> > + */
> > +static enum scan_result collapse_single_pmd(unsigned long addr,
> > +             struct vm_area_struct *vma, bool *mmap_locked,
> > +             struct collapse_control *cc)
> > +{
> > +     struct mm_struct *mm = vma->vm_mm;
> > +     enum scan_result result;
> > +     struct file *file;
> > +     pgoff_t pgoff;
> > +
> > +     if (vma_is_anonymous(vma)) {
> > +             result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc);
> > +             goto end;
> > +     }
> > +
> > +     file = get_file(vma->vm_file);
> > +     pgoff = linear_page_index(vma, addr);
> > +
> > +     mmap_read_unlock(mm);
> > +     *mmap_locked = false;
> > +     result = collapse_scan_file(mm, addr, file, pgoff, cc);
> > +
> > +     if (!cc->is_khugepaged && result == SCAN_PAGE_DIRTY_OR_WRITEBACK &&
> > +         mapping_can_writeback(file->f_mapping)) {
> > +             const loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
> > +             const loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
> > +
> > +             filemap_write_and_wait_range(file->f_mapping, lstart, lend);
> > +     }
> > +     fput(file);
> > +
> > +     if (result != SCAN_PTE_MAPPED_HUGEPAGE)
> > +             goto end;
> > +
> > +     mmap_read_lock(mm);
> > +     *mmap_locked = true;
> > +     if (collapse_test_exit_or_disable(mm)) {
> > +             mmap_read_unlock(mm);
> > +             *mmap_locked = false;
> > +             return SCAN_ANY_PROCESS;
> > +     }
> > +     result = try_collapse_pte_mapped_thp(mm, addr, !cc->is_khugepaged);
> > +     if (result == SCAN_PMD_MAPPED)
> > +             result = SCAN_SUCCEED;
> > +     mmap_read_unlock(mm);
> > +     *mmap_locked = false;
> > +
> > +end:
> > +     if (cc->is_khugepaged && result == SCAN_SUCCEED)
> > +             ++khugepaged_pages_collapsed;
> > +     return result;
> > +}
> > +
> >   static unsigned int collapse_scan_mm_slot(unsigned int pages, enum scan_result *result,
> >                                           struct collapse_control *cc)
> >       __releases(&khugepaged_mm_lock)
> > @@ -2466,34 +2522,9 @@ static unsigned int collapse_scan_mm_slot(unsigned int pages, enum scan_result *
> >                       VM_BUG_ON(khugepaged_scan.address < hstart ||
> >                                 khugepaged_scan.address + HPAGE_PMD_SIZE >
> >                                 hend);
> > -                     if (!vma_is_anonymous(vma)) {
> > -                             struct file *file = get_file(vma->vm_file);
> > -                             pgoff_t pgoff = linear_page_index(vma,
> > -                                             khugepaged_scan.address);
> > -
> > -                             mmap_read_unlock(mm);
> > -                             mmap_locked = false;
> > -                             *result = collapse_scan_file(mm,
> > -                                     khugepaged_scan.address, file, pgoff, cc);
> > -                             fput(file);
> > -                             if (*result == SCAN_PTE_MAPPED_HUGEPAGE) {
> > -                                     mmap_read_lock(mm);
> > -                                     if (collapse_test_exit_or_disable(mm))
> > -                                             goto breakouterloop;
> > -                                     *result = try_collapse_pte_mapped_thp(mm,
> > -                                             khugepaged_scan.address, false);
> > -                                     if (*result == SCAN_PMD_MAPPED)
> > -                                             *result = SCAN_SUCCEED;
> > -                                     mmap_read_unlock(mm);
> > -                             }
> > -                     } else {
> > -                             *result = collapse_scan_pmd(mm, vma,
> > -                                     khugepaged_scan.address, &mmap_locked, cc);
> > -                     }
> > -
> > -                     if (*result == SCAN_SUCCEED)
> > -                             ++khugepaged_pages_collapsed;
> >
> > +                     *result = collapse_single_pmd(khugepaged_scan.address,
> > +                                                   vma, &mmap_locked, cc);
> >                       /* move to next address */
> >                       khugepaged_scan.address += HPAGE_PMD_SIZE;
> >                       progress += HPAGE_PMD_NR;
> > @@ -2799,6 +2830,7 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
> >                       cond_resched();
> >                       mmap_read_lock(mm);
> >                       mmap_locked = true;
> > +                     *lock_dropped = true;
> >                       result = hugepage_vma_revalidate(mm, addr, false, &vma,
> >                                                        cc);
> >                       if (result  != SCAN_SUCCEED) {
> > @@ -2809,46 +2841,25 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
> >                       hend = min(hend, vma->vm_end & HPAGE_PMD_MASK);
> >               }
> >               mmap_assert_locked(mm);
> > -             if (!vma_is_anonymous(vma)) {
> > -                     struct file *file = get_file(vma->vm_file);
> > -                     pgoff_t pgoff = linear_page_index(vma, addr);
> >
> > -                     mmap_read_unlock(mm);
> > -                     mmap_locked = false;
> > -                     *lock_dropped = true;
> > -                     result = collapse_scan_file(mm, addr, file, pgoff, cc);
> > -
> > -                     if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb &&
> > -                         mapping_can_writeback(file->f_mapping)) {
> > -                             loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
> > -                             loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
> > +             result = collapse_single_pmd(addr, vma, &mmap_locked, cc);
> >
> > -                             filemap_write_and_wait_range(file->f_mapping, lstart, lend);
> > -                             triggered_wb = true;
> > -                             fput(file);
> > -                             goto retry;
> > -                     }
> > -                     fput(file);
> > -             } else {
> > -                     result = collapse_scan_pmd(mm, vma, addr, &mmap_locked, cc);
> > -             }
> >               if (!mmap_locked)
> >                       *lock_dropped = true;
> >
> > -handle_result:
> > +             if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb) {
> > +                     triggered_wb = true;
> > +                     goto retry;
> > +             }
>
> Having triggered_wb set where writeback is not actually triggered is
> suboptimal.

It took me a second to figure out what you were referring to, but I
see it now. if we return SCAN_PAGE_D_OR_WB but the can_writeback fails
it still retries.

Would be an appropriate solution if can_writeback fails to modify the
return value.
ie)

if (!cc->is_khugepaged && result == SCAN_PAGE_DIRTY_OR_WRITEBACK) {
    if (mapping_can_writeback(file->f_mapping)) {
       const loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
       const loff_t lend = lstart + HPAGE_PMD_SIZE - 1;

       filemap_write_and_wait_range(file->f_mapping, lstart, lend);
    } else {
       result = SCAN_(SOMETHING?)
    }
}
fput(file);

we dont have a enum that fits this description but we want one that
will continue.

Cheers!
-- Nico

>
> And you can tell that by realizing that you would now retry once even
> thought the mapping does not support writeback (
> mapping_can_writeback(file->f_mapping)) and no writeback actually happened.
>
> Further, we would also try to call filemap_write_and_wait_range() now
> twice instead of only during the first round.
>
> --
> Cheers,
>
> David
>
Re: [PATCH mm-unstable v1 5/5] mm/khugepaged: unify khugepaged and madv_collapse with collapse_single_pmd()
Posted by David Hildenbrand (Arm) 1 month, 1 week ago
On 2/12/26 21:26, Nico Pache wrote:
> On Thu, Feb 12, 2026 at 1:04 PM David Hildenbrand (Arm)
> <david@kernel.org> wrote:
>>
>> On 2/12/26 03:25, Nico Pache wrote:
>>> The khugepaged daemon and madvise_collapse have two different
>>> implementations that do almost the same thing.
>>>
>>> Create collapse_single_pmd to increase code reuse and create an entry
>>> point to these two users.
>>>
>>> Refactor madvise_collapse and collapse_scan_mm_slot to use the new
>>> collapse_single_pmd function. This introduces a minor behavioral change
>>> that is most likely an undiscovered bug. The current implementation of
>>> khugepaged tests collapse_test_exit_or_disable before calling
>>> collapse_pte_mapped_thp, but we weren't doing it in the madvise_collapse
>>> case. By unifying these two callers madvise_collapse now also performs
>>> this check. We also modify the return value to be SCAN_ANY_PROCESS which
>>> properly indicates that this process is no longer valid to operate on.
>>>
>>> We also guard the khugepaged_pages_collapsed variable to ensure its only
>>> incremented for khugepaged.
>>>
>>> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>>> Signed-off-by: Nico Pache <npache@redhat.com>
>>> ---
>>>    mm/khugepaged.c | 121 ++++++++++++++++++++++++++----------------------
>>>    1 file changed, 66 insertions(+), 55 deletions(-)
>>>
>>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>>> index fa41480f6948..0839a781bedd 100644
>>> --- a/mm/khugepaged.c
>>> +++ b/mm/khugepaged.c
>>> @@ -2395,6 +2395,62 @@ static enum scan_result collapse_scan_file(struct mm_struct *mm, unsigned long a
>>>        return result;
>>>    }
>>>
>>> +/*
>>> + * Try to collapse a single PMD starting at a PMD aligned addr, and return
>>> + * the results.
>>> + */
>>> +static enum scan_result collapse_single_pmd(unsigned long addr,
>>> +             struct vm_area_struct *vma, bool *mmap_locked,
>>> +             struct collapse_control *cc)
>>> +{
>>> +     struct mm_struct *mm = vma->vm_mm;
>>> +     enum scan_result result;
>>> +     struct file *file;
>>> +     pgoff_t pgoff;
>>> +
>>> +     if (vma_is_anonymous(vma)) {
>>> +             result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc);
>>> +             goto end;
>>> +     }
>>> +
>>> +     file = get_file(vma->vm_file);
>>> +     pgoff = linear_page_index(vma, addr);
>>> +
>>> +     mmap_read_unlock(mm);
>>> +     *mmap_locked = false;
>>> +     result = collapse_scan_file(mm, addr, file, pgoff, cc);
>>> +
>>> +     if (!cc->is_khugepaged && result == SCAN_PAGE_DIRTY_OR_WRITEBACK &&
>>> +         mapping_can_writeback(file->f_mapping)) {
>>> +             const loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
>>> +             const loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
>>> +
>>> +             filemap_write_and_wait_range(file->f_mapping, lstart, lend);
>>> +     }
>>> +     fput(file);
>>> +
>>> +     if (result != SCAN_PTE_MAPPED_HUGEPAGE)
>>> +             goto end;
>>> +
>>> +     mmap_read_lock(mm);
>>> +     *mmap_locked = true;
>>> +     if (collapse_test_exit_or_disable(mm)) {
>>> +             mmap_read_unlock(mm);
>>> +             *mmap_locked = false;
>>> +             return SCAN_ANY_PROCESS;
>>> +     }
>>> +     result = try_collapse_pte_mapped_thp(mm, addr, !cc->is_khugepaged);
>>> +     if (result == SCAN_PMD_MAPPED)
>>> +             result = SCAN_SUCCEED;
>>> +     mmap_read_unlock(mm);
>>> +     *mmap_locked = false;
>>> +
>>> +end:
>>> +     if (cc->is_khugepaged && result == SCAN_SUCCEED)
>>> +             ++khugepaged_pages_collapsed;
>>> +     return result;
>>> +}
>>> +
>>>    static unsigned int collapse_scan_mm_slot(unsigned int pages, enum scan_result *result,
>>>                                            struct collapse_control *cc)
>>>        __releases(&khugepaged_mm_lock)
>>> @@ -2466,34 +2522,9 @@ static unsigned int collapse_scan_mm_slot(unsigned int pages, enum scan_result *
>>>                        VM_BUG_ON(khugepaged_scan.address < hstart ||
>>>                                  khugepaged_scan.address + HPAGE_PMD_SIZE >
>>>                                  hend);
>>> -                     if (!vma_is_anonymous(vma)) {
>>> -                             struct file *file = get_file(vma->vm_file);
>>> -                             pgoff_t pgoff = linear_page_index(vma,
>>> -                                             khugepaged_scan.address);
>>> -
>>> -                             mmap_read_unlock(mm);
>>> -                             mmap_locked = false;
>>> -                             *result = collapse_scan_file(mm,
>>> -                                     khugepaged_scan.address, file, pgoff, cc);
>>> -                             fput(file);
>>> -                             if (*result == SCAN_PTE_MAPPED_HUGEPAGE) {
>>> -                                     mmap_read_lock(mm);
>>> -                                     if (collapse_test_exit_or_disable(mm))
>>> -                                             goto breakouterloop;
>>> -                                     *result = try_collapse_pte_mapped_thp(mm,
>>> -                                             khugepaged_scan.address, false);
>>> -                                     if (*result == SCAN_PMD_MAPPED)
>>> -                                             *result = SCAN_SUCCEED;
>>> -                                     mmap_read_unlock(mm);
>>> -                             }
>>> -                     } else {
>>> -                             *result = collapse_scan_pmd(mm, vma,
>>> -                                     khugepaged_scan.address, &mmap_locked, cc);
>>> -                     }
>>> -
>>> -                     if (*result == SCAN_SUCCEED)
>>> -                             ++khugepaged_pages_collapsed;
>>>
>>> +                     *result = collapse_single_pmd(khugepaged_scan.address,
>>> +                                                   vma, &mmap_locked, cc);
>>>                        /* move to next address */
>>>                        khugepaged_scan.address += HPAGE_PMD_SIZE;
>>>                        progress += HPAGE_PMD_NR;
>>> @@ -2799,6 +2830,7 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
>>>                        cond_resched();
>>>                        mmap_read_lock(mm);
>>>                        mmap_locked = true;
>>> +                     *lock_dropped = true;
>>>                        result = hugepage_vma_revalidate(mm, addr, false, &vma,
>>>                                                         cc);
>>>                        if (result  != SCAN_SUCCEED) {
>>> @@ -2809,46 +2841,25 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
>>>                        hend = min(hend, vma->vm_end & HPAGE_PMD_MASK);
>>>                }
>>>                mmap_assert_locked(mm);
>>> -             if (!vma_is_anonymous(vma)) {
>>> -                     struct file *file = get_file(vma->vm_file);
>>> -                     pgoff_t pgoff = linear_page_index(vma, addr);
>>>
>>> -                     mmap_read_unlock(mm);
>>> -                     mmap_locked = false;
>>> -                     *lock_dropped = true;
>>> -                     result = collapse_scan_file(mm, addr, file, pgoff, cc);
>>> -
>>> -                     if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb &&
>>> -                         mapping_can_writeback(file->f_mapping)) {
>>> -                             loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
>>> -                             loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
>>> +             result = collapse_single_pmd(addr, vma, &mmap_locked, cc);
>>>
>>> -                             filemap_write_and_wait_range(file->f_mapping, lstart, lend);
>>> -                             triggered_wb = true;
>>> -                             fput(file);
>>> -                             goto retry;
>>> -                     }
>>> -                     fput(file);
>>> -             } else {
>>> -                     result = collapse_scan_pmd(mm, vma, addr, &mmap_locked, cc);
>>> -             }
>>>                if (!mmap_locked)
>>>                        *lock_dropped = true;
>>>
>>> -handle_result:
>>> +             if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb) {
>>> +                     triggered_wb = true;
>>> +                     goto retry;
>>> +             }
>>
>> Having triggered_wb set where writeback is not actually triggered is
>> suboptimal.
> 
> It took me a second to figure out what you were referring to, but I
> see it now. if we return SCAN_PAGE_D_OR_WB but the can_writeback fails
> it still retries.
> 
> Would be an appropriate solution if can_writeback fails to modify the
> return value.
> ie)
> 
> if (!cc->is_khugepaged && result == SCAN_PAGE_DIRTY_OR_WRITEBACK) {
>      if (mapping_can_writeback(file->f_mapping)) {
>         const loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
>         const loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
> 
>         filemap_write_and_wait_range(file->f_mapping, lstart, lend);
>      } else {
>         result = SCAN_(SOMETHING?)
>      }
> }
> fput(file);
> 
> we dont have a enum that fits this description but we want one that
> will continue.

I stared at the patch and possible ways to change it, but I wondered 
whether this refactoring is really the right approach.

The whole mmap locking just makes this all very weird.

Let me think about it some more.

-- 
Cheers,

David
Re: [PATCH mm-unstable v1 5/5] mm/khugepaged: unify khugepaged and madv_collapse with collapse_single_pmd()
Posted by David Hildenbrand (Arm) 1 month, 1 week ago
On 2/23/26 15:24, David Hildenbrand (Arm) wrote:
> On 2/12/26 21:26, Nico Pache wrote:
>> On Thu, Feb 12, 2026 at 1:04 PM David Hildenbrand (Arm)
>> <david@kernel.org> wrote:
>>>
>>>
>>> Having triggered_wb set where writeback is not actually triggered is
>>> suboptimal.
>>
>> It took me a second to figure out what you were referring to, but I
>> see it now. if we return SCAN_PAGE_D_OR_WB but the can_writeback fails
>> it still retries.
>>
>> Would be an appropriate solution if can_writeback fails to modify the
>> return value.
>> ie)
>>
>> if (!cc->is_khugepaged && result == SCAN_PAGE_DIRTY_OR_WRITEBACK) {
>>      if (mapping_can_writeback(file->f_mapping)) {
>>         const loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
>>         const loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
>>
>>         filemap_write_and_wait_range(file->f_mapping, lstart, lend);
>>      } else {
>>         result = SCAN_(SOMETHING?)
>>      }
>> }
>> fput(file);
>>
>> we dont have a enum that fits this description but we want one that
>> will continue.
> 
> I stared at the patch and possible ways to change it, but I wondered 
> whether this refactoring is really the right approach.
> 
> The whole mmap locking just makes this all very weird.
> 
> Let me think about it some more.
> 

As we don't really need to re-grab the MM lock, I think we can just do:

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 844a8a27845e..d546ff173833 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2395,6 +2395,7 @@ static enum scan_result collapse_single_pmd(unsigned long addr,
                 struct collapse_control *cc)
  {
         struct mm_struct *mm = vma->vm_mm;
+       bool triggered_wb = false;
         enum scan_result result;
         struct file *file;
         pgoff_t pgoff;
@@ -2409,14 +2410,22 @@ static enum scan_result collapse_single_pmd(unsigned long addr,
  
         mmap_read_unlock(mm);
         *mmap_locked = false;
+
+retry:
         result = collapse_scan_file(mm, addr, file, pgoff, cc);
  
+       /*
+        * For MADV_COLLAPSE, try to writeback once to retry the collapse
+        * immediately.
+        */
         if (!cc->is_khugepaged && result == SCAN_PAGE_DIRTY_OR_WRITEBACK &&
-           mapping_can_writeback(file->f_mapping)) {
+           !triggered_wb && mapping_can_writeback(file->f_mapping)) {
                 const loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
                 const loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
  
                 filemap_write_and_wait_range(file->f_mapping, lstart, lend);
+               triggered_wb = true;
+               goto retry;
         }
         fput(file);
  
@@ -2814,9 +2823,7 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
  
         for (addr = hstart; addr < hend; addr += HPAGE_PMD_SIZE) {
                 enum scan_result result = SCAN_FAIL;
-               bool triggered_wb = false;
  
-retry:
                 if (!mmap_locked) {
                         cond_resched();
                         mmap_read_lock(mm);
@@ -2838,11 +2845,6 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
                 if (!mmap_locked)
                         *lock_dropped = true;
  
-               if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb) {
-                       triggered_wb = true;
-                       goto retry;
-               }
-
                 switch (result) {
                 case SCAN_SUCCEED:
                 case SCAN_PMD_MAPPED:


-- 
Cheers,

David
Re: [PATCH mm-unstable v1 5/5] mm/khugepaged: unify khugepaged and madv_collapse with collapse_single_pmd()
Posted by Lance Yang 1 month, 2 weeks ago

On 2026/2/13 04:26, Nico Pache wrote:
> On Thu, Feb 12, 2026 at 1:04 PM David Hildenbrand (Arm)
> <david@kernel.org> wrote:
>>
>> On 2/12/26 03:25, Nico Pache wrote:
>>> The khugepaged daemon and madvise_collapse have two different
>>> implementations that do almost the same thing.
>>>
>>> Create collapse_single_pmd to increase code reuse and create an entry
>>> point to these two users.
>>>
>>> Refactor madvise_collapse and collapse_scan_mm_slot to use the new
>>> collapse_single_pmd function. This introduces a minor behavioral change
>>> that is most likely an undiscovered bug. The current implementation of
>>> khugepaged tests collapse_test_exit_or_disable before calling
>>> collapse_pte_mapped_thp, but we weren't doing it in the madvise_collapse
>>> case. By unifying these two callers madvise_collapse now also performs
>>> this check. We also modify the return value to be SCAN_ANY_PROCESS which
>>> properly indicates that this process is no longer valid to operate on.
>>>
>>> We also guard the khugepaged_pages_collapsed variable to ensure its only
>>> incremented for khugepaged.
>>>
>>> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>>> Signed-off-by: Nico Pache <npache@redhat.com>
>>> ---
>>>    mm/khugepaged.c | 121 ++++++++++++++++++++++++++----------------------
>>>    1 file changed, 66 insertions(+), 55 deletions(-)
>>>
>>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>>> index fa41480f6948..0839a781bedd 100644
>>> --- a/mm/khugepaged.c
>>> +++ b/mm/khugepaged.c
>>> @@ -2395,6 +2395,62 @@ static enum scan_result collapse_scan_file(struct mm_struct *mm, unsigned long a
>>>        return result;
>>>    }
>>>
>>> +/*
>>> + * Try to collapse a single PMD starting at a PMD aligned addr, and return
>>> + * the results.
>>> + */
>>> +static enum scan_result collapse_single_pmd(unsigned long addr,
>>> +             struct vm_area_struct *vma, bool *mmap_locked,
>>> +             struct collapse_control *cc)
>>> +{
>>> +     struct mm_struct *mm = vma->vm_mm;
>>> +     enum scan_result result;
>>> +     struct file *file;
>>> +     pgoff_t pgoff;
>>> +
>>> +     if (vma_is_anonymous(vma)) {
>>> +             result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc);
>>> +             goto end;
>>> +     }
>>> +
>>> +     file = get_file(vma->vm_file);
>>> +     pgoff = linear_page_index(vma, addr);
>>> +
>>> +     mmap_read_unlock(mm);
>>> +     *mmap_locked = false;
>>> +     result = collapse_scan_file(mm, addr, file, pgoff, cc);
>>> +
>>> +     if (!cc->is_khugepaged && result == SCAN_PAGE_DIRTY_OR_WRITEBACK &&
>>> +         mapping_can_writeback(file->f_mapping)) {
>>> +             const loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
>>> +             const loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
>>> +
>>> +             filemap_write_and_wait_range(file->f_mapping, lstart, lend);
>>> +     }
>>> +     fput(file);
>>> +
>>> +     if (result != SCAN_PTE_MAPPED_HUGEPAGE)
>>> +             goto end;
>>> +
>>> +     mmap_read_lock(mm);
>>> +     *mmap_locked = true;
>>> +     if (collapse_test_exit_or_disable(mm)) {
>>> +             mmap_read_unlock(mm);
>>> +             *mmap_locked = false;
>>> +             return SCAN_ANY_PROCESS;
>>> +     }
>>> +     result = try_collapse_pte_mapped_thp(mm, addr, !cc->is_khugepaged);
>>> +     if (result == SCAN_PMD_MAPPED)
>>> +             result = SCAN_SUCCEED;
>>> +     mmap_read_unlock(mm);
>>> +     *mmap_locked = false;
>>> +
>>> +end:
>>> +     if (cc->is_khugepaged && result == SCAN_SUCCEED)
>>> +             ++khugepaged_pages_collapsed;
>>> +     return result;
>>> +}
>>> +
>>>    static unsigned int collapse_scan_mm_slot(unsigned int pages, enum scan_result *result,
>>>                                            struct collapse_control *cc)
>>>        __releases(&khugepaged_mm_lock)
>>> @@ -2466,34 +2522,9 @@ static unsigned int collapse_scan_mm_slot(unsigned int pages, enum scan_result *
>>>                        VM_BUG_ON(khugepaged_scan.address < hstart ||
>>>                                  khugepaged_scan.address + HPAGE_PMD_SIZE >
>>>                                  hend);
>>> -                     if (!vma_is_anonymous(vma)) {
>>> -                             struct file *file = get_file(vma->vm_file);
>>> -                             pgoff_t pgoff = linear_page_index(vma,
>>> -                                             khugepaged_scan.address);
>>> -
>>> -                             mmap_read_unlock(mm);
>>> -                             mmap_locked = false;
>>> -                             *result = collapse_scan_file(mm,
>>> -                                     khugepaged_scan.address, file, pgoff, cc);
>>> -                             fput(file);
>>> -                             if (*result == SCAN_PTE_MAPPED_HUGEPAGE) {
>>> -                                     mmap_read_lock(mm);
>>> -                                     if (collapse_test_exit_or_disable(mm))
>>> -                                             goto breakouterloop;
>>> -                                     *result = try_collapse_pte_mapped_thp(mm,
>>> -                                             khugepaged_scan.address, false);
>>> -                                     if (*result == SCAN_PMD_MAPPED)
>>> -                                             *result = SCAN_SUCCEED;
>>> -                                     mmap_read_unlock(mm);
>>> -                             }
>>> -                     } else {
>>> -                             *result = collapse_scan_pmd(mm, vma,
>>> -                                     khugepaged_scan.address, &mmap_locked, cc);
>>> -                     }
>>> -
>>> -                     if (*result == SCAN_SUCCEED)
>>> -                             ++khugepaged_pages_collapsed;
>>>
>>> +                     *result = collapse_single_pmd(khugepaged_scan.address,
>>> +                                                   vma, &mmap_locked, cc);
>>>                        /* move to next address */
>>>                        khugepaged_scan.address += HPAGE_PMD_SIZE;
>>>                        progress += HPAGE_PMD_NR;
>>> @@ -2799,6 +2830,7 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
>>>                        cond_resched();
>>>                        mmap_read_lock(mm);
>>>                        mmap_locked = true;
>>> +                     *lock_dropped = true;
>>>                        result = hugepage_vma_revalidate(mm, addr, false, &vma,
>>>                                                         cc);
>>>                        if (result  != SCAN_SUCCEED) {
>>> @@ -2809,46 +2841,25 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
>>>                        hend = min(hend, vma->vm_end & HPAGE_PMD_MASK);
>>>                }
>>>                mmap_assert_locked(mm);
>>> -             if (!vma_is_anonymous(vma)) {
>>> -                     struct file *file = get_file(vma->vm_file);
>>> -                     pgoff_t pgoff = linear_page_index(vma, addr);
>>>
>>> -                     mmap_read_unlock(mm);
>>> -                     mmap_locked = false;
>>> -                     *lock_dropped = true;
>>> -                     result = collapse_scan_file(mm, addr, file, pgoff, cc);
>>> -
>>> -                     if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb &&
>>> -                         mapping_can_writeback(file->f_mapping)) {
>>> -                             loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
>>> -                             loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
>>> +             result = collapse_single_pmd(addr, vma, &mmap_locked, cc);
>>>
>>> -                             filemap_write_and_wait_range(file->f_mapping, lstart, lend);
>>> -                             triggered_wb = true;
>>> -                             fput(file);
>>> -                             goto retry;
>>> -                     }
>>> -                     fput(file);
>>> -             } else {
>>> -                     result = collapse_scan_pmd(mm, vma, addr, &mmap_locked, cc);
>>> -             }
>>>                if (!mmap_locked)
>>>                        *lock_dropped = true;
>>>
>>> -handle_result:
>>> +             if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb) {
>>> +                     triggered_wb = true;
>>> +                     goto retry;
>>> +             }
>>
>> Having triggered_wb set where writeback is not actually triggered is
>> suboptimal.

Good catch!

> 
> It took me a second to figure out what you were referring to, but I
> see it now. if we return SCAN_PAGE_D_OR_WB but the can_writeback fails
> it still retries.
> 
> Would be an appropriate solution if can_writeback fails to modify the
> return value.
> ie)

Yep, we're on the right track. IIRC, David's concern has two parts:

1) Avoid retry when writeback wasn't actually triggered
    (mapping_can_writeback() fails)
2) Avoid calling filemap_write_and_wait_range() twice on retry

The proposed approach below addresses #1, but we still need to tackle #2.

The issue is that on the retry, collapse_single_pmd() doesn't know that
writeback was already performed in the previous round, so it could call
filemap_write_and_wait_range() again if the page is stil dirty.

> 
> if (!cc->is_khugepaged && result == SCAN_PAGE_DIRTY_OR_WRITEBACK) {
>      if (mapping_can_writeback(file->f_mapping)) {
>         const loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
>         const loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
> 
>         filemap_write_and_wait_range(file->f_mapping, lstart, lend);
>      } else {
>         result = SCAN_(SOMETHING?)
>      }
> }
> fput(file);
> 
> we dont have a enum that fits this description but we want one that
> will continue.
> 
> Cheers!
> -- Nico
> 
>>
>> And you can tell that by realizing that you would now retry once even
>> thought the mapping does not support writeback (
>> mapping_can_writeback(file->f_mapping)) and no writeback actually happened.
>>
>> Further, we would also try to call filemap_write_and_wait_range() now
>> twice instead of only during the first round.

Right. Let's avoid calling it twice.


Cheers,
Lance
Re: [PATCH mm-unstable v1 5/5] mm/khugepaged: unify khugepaged and madv_collapse with collapse_single_pmd()
Posted by Zi Yan 1 month, 2 weeks ago
On 11 Feb 2026, at 21:25, Nico Pache wrote:

> The khugepaged daemon and madvise_collapse have two different
> implementations that do almost the same thing.
>
> Create collapse_single_pmd to increase code reuse and create an entry
> point to these two users.
>
> Refactor madvise_collapse and collapse_scan_mm_slot to use the new
> collapse_single_pmd function. This introduces a minor behavioral change
> that is most likely an undiscovered bug. The current implementation of
> khugepaged tests collapse_test_exit_or_disable before calling
> collapse_pte_mapped_thp, but we weren't doing it in the madvise_collapse
> case. By unifying these two callers madvise_collapse now also performs
> this check. We also modify the return value to be SCAN_ANY_PROCESS which
> properly indicates that this process is no longer valid to operate on.
>
> We also guard the khugepaged_pages_collapsed variable to ensure its only
> incremented for khugepaged.
>
> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Signed-off-by: Nico Pache <npache@redhat.com>
> ---
>  mm/khugepaged.c | 121 ++++++++++++++++++++++++++----------------------
>  1 file changed, 66 insertions(+), 55 deletions(-)
>

<snip>

> @@ -2799,6 +2830,7 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
>  			cond_resched();
>  			mmap_read_lock(mm);
>  			mmap_locked = true;
> +			*lock_dropped = true;

Is this needed?

1. There is a one above handle_result;
2. mmap_locked is true when entering madvise_collapse(), so *lock_dropped would
   change only after one iteration and the one below should take care of it;
3. goto retry is moved below “*lock_dropped = true”.

Let me know if I miss anything.

>  			result = hugepage_vma_revalidate(mm, addr, false, &vma,
>  							 cc);
>  			if (result  != SCAN_SUCCEED) {
> @@ -2809,46 +2841,25 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
>  			hend = min(hend, vma->vm_end & HPAGE_PMD_MASK);
>  		}
>  		mmap_assert_locked(mm);
> -		if (!vma_is_anonymous(vma)) {
> -			struct file *file = get_file(vma->vm_file);
> -			pgoff_t pgoff = linear_page_index(vma, addr);
>
> -			mmap_read_unlock(mm);
> -			mmap_locked = false;
> -			*lock_dropped = true;
> -			result = collapse_scan_file(mm, addr, file, pgoff, cc);
> -
> -			if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb &&
> -			    mapping_can_writeback(file->f_mapping)) {
> -				loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
> -				loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
> +		result = collapse_single_pmd(addr, vma, &mmap_locked, cc);
>
> -				filemap_write_and_wait_range(file->f_mapping, lstart, lend);
> -				triggered_wb = true;
> -				fput(file);
> -				goto retry;
> -			}
> -			fput(file);
> -		} else {
> -			result = collapse_scan_pmd(mm, vma, addr, &mmap_locked, cc);
> -		}
>  		if (!mmap_locked)
>  			*lock_dropped = true;
>
> -handle_result:
> +		if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb) {
> +			triggered_wb = true;
> +			goto retry;
> +		}
> +
>  		switch (result) {
>  		case SCAN_SUCCEED:
>  		case SCAN_PMD_MAPPED:
>  			++thps;
>  			break;
> -		case SCAN_PTE_MAPPED_HUGEPAGE:
> -			BUG_ON(mmap_locked);
> -			mmap_read_lock(mm);
> -			result = try_collapse_pte_mapped_thp(mm, addr, true);
> -			mmap_read_unlock(mm);
> -			goto handle_result;
>  		/* Whitelisted set of results where continuing OK */
>  		case SCAN_NO_PTE_TABLE:
> +		case SCAN_PTE_MAPPED_HUGEPAGE:
>  		case SCAN_PTE_NON_PRESENT:
>  		case SCAN_PTE_UFFD_WP:
>  		case SCAN_LACK_REFERENCED_PAGE:
> -- 
> 2.53.0

Otherwise, LGTM.

Reviewed-by: Zi Yan <ziy@nvidia.com>

Best Regards,
Yan, Zi
Re: [PATCH mm-unstable v1 5/5] mm/khugepaged: unify khugepaged and madv_collapse with collapse_single_pmd()
Posted by Pedro Falcato 1 month, 2 weeks ago
On Wed, Feb 11, 2026 at 07:25:12PM -0700, Nico Pache wrote:
> The khugepaged daemon and madvise_collapse have two different
> implementations that do almost the same thing.
> 
> Create collapse_single_pmd to increase code reuse and create an entry
> point to these two users.
> 
> Refactor madvise_collapse and collapse_scan_mm_slot to use the new
> collapse_single_pmd function. This introduces a minor behavioral change
> that is most likely an undiscovered bug. The current implementation of
> khugepaged tests collapse_test_exit_or_disable before calling
> collapse_pte_mapped_thp, but we weren't doing it in the madvise_collapse
> case. By unifying these two callers madvise_collapse now also performs
> this check. We also modify the return value to be SCAN_ANY_PROCESS which
> properly indicates that this process is no longer valid to operate on.
> 
> We also guard the khugepaged_pages_collapsed variable to ensure its only
> incremented for khugepaged.
> 
> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Signed-off-by: Nico Pache <npache@redhat.com>

Acked-by: Pedro Falcato <pfalcato@suse.de>

-- 
Pedro