The khugepaged daemon and madvise_collapse have two different
implementations that do almost the same thing.
Create collapse_single_pmd to increase code reuse and create an entry
point to these two users.
Refactor madvise_collapse and collapse_scan_mm_slot to use the new
collapse_single_pmd function. This introduces a minor behavioral change
that is most likely an undiscovered bug. The current implementation of
khugepaged tests collapse_test_exit_or_disable before calling
collapse_pte_mapped_thp, but we weren't doing it in the madvise_collapse
case. By unifying these two callers madvise_collapse now also performs
this check.
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Nico Pache <npache@redhat.com>
---
mm/khugepaged.c | 94 ++++++++++++++++++++++++++-----------------------
1 file changed, 49 insertions(+), 45 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 0e7bbadf03ee..b7b98aebb670 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2382,6 +2382,50 @@ static int collapse_scan_file(struct mm_struct *mm, unsigned long addr,
return result;
}
+/*
+ * Try to collapse a single PMD starting at a PMD aligned addr, and return
+ * the results.
+ */
+static int collapse_single_pmd(unsigned long addr,
+ struct vm_area_struct *vma, bool *mmap_locked,
+ struct collapse_control *cc)
+{
+ int result = SCAN_FAIL;
+ struct mm_struct *mm = vma->vm_mm;
+
+ if (!vma_is_anonymous(vma)) {
+ struct file *file = get_file(vma->vm_file);
+ pgoff_t pgoff = linear_page_index(vma, addr);
+
+ mmap_read_unlock(mm);
+ *mmap_locked = false;
+ result = collapse_scan_file(mm, addr, file, pgoff, cc);
+ fput(file);
+ if (result == SCAN_PTE_MAPPED_HUGEPAGE) {
+ mmap_read_lock(mm);
+ *mmap_locked = true;
+ if (collapse_test_exit_or_disable(mm)) {
+ mmap_read_unlock(mm);
+ *mmap_locked = false;
+ result = SCAN_ANY_PROCESS;
+ goto end;
+ }
+ result = collapse_pte_mapped_thp(mm, addr,
+ !cc->is_khugepaged);
+ if (result == SCAN_PMD_MAPPED)
+ result = SCAN_SUCCEED;
+ mmap_read_unlock(mm);
+ *mmap_locked = false;
+ }
+ } else {
+ result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc);
+ }
+ if (cc->is_khugepaged && result == SCAN_SUCCEED)
+ ++khugepaged_pages_collapsed;
+end:
+ return result;
+}
+
static unsigned int collapse_scan_mm_slot(unsigned int pages, int *result,
struct collapse_control *cc)
__releases(&khugepaged_mm_lock)
@@ -2455,34 +2499,9 @@ static unsigned int collapse_scan_mm_slot(unsigned int pages, int *result,
VM_BUG_ON(khugepaged_scan.address < hstart ||
khugepaged_scan.address + HPAGE_PMD_SIZE >
hend);
- if (!vma_is_anonymous(vma)) {
- struct file *file = get_file(vma->vm_file);
- pgoff_t pgoff = linear_page_index(vma,
- khugepaged_scan.address);
-
- mmap_read_unlock(mm);
- mmap_locked = false;
- *result = collapse_scan_file(mm,
- khugepaged_scan.address, file, pgoff, cc);
- fput(file);
- if (*result == SCAN_PTE_MAPPED_HUGEPAGE) {
- mmap_read_lock(mm);
- if (collapse_test_exit_or_disable(mm))
- goto breakouterloop;
- *result = collapse_pte_mapped_thp(mm,
- khugepaged_scan.address, false);
- if (*result == SCAN_PMD_MAPPED)
- *result = SCAN_SUCCEED;
- mmap_read_unlock(mm);
- }
- } else {
- *result = collapse_scan_pmd(mm, vma,
- khugepaged_scan.address, &mmap_locked, cc);
- }
-
- if (*result == SCAN_SUCCEED)
- ++khugepaged_pages_collapsed;
+ *result = collapse_single_pmd(khugepaged_scan.address,
+ vma, &mmap_locked, cc);
/* move to next address */
khugepaged_scan.address += HPAGE_PMD_SIZE;
progress += HPAGE_PMD_NR;
@@ -2799,34 +2818,19 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
mmap_assert_locked(mm);
memset(cc->node_load, 0, sizeof(cc->node_load));
nodes_clear(cc->alloc_nmask);
- if (!vma_is_anonymous(vma)) {
- struct file *file = get_file(vma->vm_file);
- pgoff_t pgoff = linear_page_index(vma, addr);
- mmap_read_unlock(mm);
- mmap_locked = false;
- result = collapse_scan_file(mm, addr, file, pgoff, cc);
- fput(file);
- } else {
- result = collapse_scan_pmd(mm, vma, addr,
- &mmap_locked, cc);
- }
+ result = collapse_single_pmd(addr, vma, &mmap_locked, cc);
+
if (!mmap_locked)
*lock_dropped = true;
-handle_result:
switch (result) {
case SCAN_SUCCEED:
case SCAN_PMD_MAPPED:
++thps;
break;
- case SCAN_PTE_MAPPED_HUGEPAGE:
- BUG_ON(mmap_locked);
- mmap_read_lock(mm);
- result = collapse_pte_mapped_thp(mm, addr, true);
- mmap_read_unlock(mm);
- goto handle_result;
/* Whitelisted set of results where continuing OK */
+ case SCAN_PTE_MAPPED_HUGEPAGE:
case SCAN_PMD_NULL:
case SCAN_PTE_NON_PRESENT:
case SCAN_PTE_UFFD_WP:
--
2.50.1
On Tue, Aug 19, 2025 at 07:41:54AM -0600, Nico Pache wrote: > The khugepaged daemon and madvise_collapse have two different > implementations that do almost the same thing. > > Create collapse_single_pmd to increase code reuse and create an entry > point to these two users. > > Refactor madvise_collapse and collapse_scan_mm_slot to use the new > collapse_single_pmd function. This introduces a minor behavioral change > that is most likely an undiscovered bug. The current implementation of > khugepaged tests collapse_test_exit_or_disable before calling > collapse_pte_mapped_thp, but we weren't doing it in the madvise_collapse > case. By unifying these two callers madvise_collapse now also performs > this check. > > Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com> > Acked-by: David Hildenbrand <david@redhat.com> > Signed-off-by: Nico Pache <npache@redhat.com> > --- > mm/khugepaged.c | 94 ++++++++++++++++++++++++++----------------------- > 1 file changed, 49 insertions(+), 45 deletions(-) > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 0e7bbadf03ee..b7b98aebb670 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -2382,6 +2382,50 @@ static int collapse_scan_file(struct mm_struct *mm, unsigned long addr, > return result; > } > > +/* > + * Try to collapse a single PMD starting at a PMD aligned addr, and return > + * the results. > + */ > +static int collapse_single_pmd(unsigned long addr, > + struct vm_area_struct *vma, bool *mmap_locked, > + struct collapse_control *cc) > +{ > + int result = SCAN_FAIL; You assign result in all branches, so this can be uninitialised. > + struct mm_struct *mm = vma->vm_mm; > + > + if (!vma_is_anonymous(vma)) { > + struct file *file = get_file(vma->vm_file); > + pgoff_t pgoff = linear_page_index(vma, addr); > + > + mmap_read_unlock(mm); > + *mmap_locked = false; > + result = collapse_scan_file(mm, addr, file, pgoff, cc); > + fput(file); > + if (result == SCAN_PTE_MAPPED_HUGEPAGE) { > + mmap_read_lock(mm); > + *mmap_locked = true; > + if (collapse_test_exit_or_disable(mm)) { > + mmap_read_unlock(mm); > + *mmap_locked = false; > + result = SCAN_ANY_PROCESS; > + goto end; Don't love that in e.g. collapse_scan_mm_slot() we are using the mmap lock being disabled as in effect an error code. Is SCAN_ANY_PROCESS correct here? Because in collapse_scan_mm_slot() you'd previously: if (*result == SCAN_PTE_MAPPED_HUGEPAGE) { mmap_read_lock(mm); if (collapse_test_exit_or_disable(mm)) goto breakouterloop; ... } But now you're setting result = SCAN_ANY_PROCESS rather than SCAN_PTE_MAPPED_HUGEPAGE in this instance? You don't mention that you're changing this, or at least explicitly enough, the commit message should state that you're changing this and explain why it's ok. This whole file is horrid, and it's kinda an aside, but I really wish we had some comment going through each of the scan_result cases and explaining what each one meant. Also I think: return SCAN_ANY_PROCESS; Is better than: result = SCAN_ANY_PROCESS; goto end; ... end: return result; > + } > + result = collapse_pte_mapped_thp(mm, addr, > + !cc->is_khugepaged); Hm another change here, in the original code in collapse_scan_mm_slot() this is: *result = collapse_pte_mapped_thp(mm, khugepaged_scan.address, false); Presumably collapse_scan_mm_slot() is only ever invoked with cc->is_khugepaged? Maybe worth adding a VM_WARN_ON_ONCE(!cc->is_khugepaged) at the top of collapse_scan_mm_slot() to assert this (and other places where your change assumes this to be the case). > + if (result == SCAN_PMD_MAPPED) > + result = SCAN_SUCCEED; > + mmap_read_unlock(mm); > + *mmap_locked = false; > + } > + } else { > + result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc); > + } > + if (cc->is_khugepaged && result == SCAN_SUCCEED) > + ++khugepaged_pages_collapsed; Similarly, presumably because collapse_scan_mm_slot() only ever invoked khugepaged case this didn't have the cc->is_khugepaged check? > +end: > + return result; > +} There's a LOT of nesting going on here, I think we can simplify this a lot. If we make the change I noted above re: returning SCAN_ANY_PROCESS< we can move the end label up a bit and avoid a ton of nesting, e.g.: static int collapse_single_pmd(unsigned long addr, struct vm_area_struct *vma, bool *mmap_locked, struct collapse_control *cc) { struct mm_struct *mm = vma->vm_mm; struct file *file; pgoff_t pgoff; int result; if (vma_is_anonymous(vma)) { result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc); goto end: } file = get_file(vma->vm_file); pgoff = linear_page_index(vma, addr); mmap_read_unlock(mm); *mmap_locked = false; result = collapse_scan_file(mm, addr, file, pgoff, cc); fput(file); if (result != SCAN_PTE_MAPPED_HUGEPAGE) goto end; mmap_read_lock(mm); *mmap_locked = true; if (collapse_test_exit_or_disable(mm)) { mmap_read_unlock(mm); *mmap_locked = false; return SCAN_ANY_PROCESS; } result = collapse_pte_mapped_thp(mm, addr, !cc->is_khugepaged); if (result == SCAN_PMD_MAPPED) result = SCAN_SUCCEED; mmap_read_unlock(mm); *mmap_locked = false; end: if (cc->is_khugepaged && result == SCAN_SUCCEED) ++khugepaged_pages_collapsed; return result; } (untested, thrown together so do double check!) > + > static unsigned int collapse_scan_mm_slot(unsigned int pages, int *result, > struct collapse_control *cc) > __releases(&khugepaged_mm_lock) > @@ -2455,34 +2499,9 @@ static unsigned int collapse_scan_mm_slot(unsigned int pages, int *result, > VM_BUG_ON(khugepaged_scan.address < hstart || > khugepaged_scan.address + HPAGE_PMD_SIZE > > hend); > - if (!vma_is_anonymous(vma)) { > - struct file *file = get_file(vma->vm_file); > - pgoff_t pgoff = linear_page_index(vma, > - khugepaged_scan.address); > - > - mmap_read_unlock(mm); > - mmap_locked = false; > - *result = collapse_scan_file(mm, > - khugepaged_scan.address, file, pgoff, cc); > - fput(file); > - if (*result == SCAN_PTE_MAPPED_HUGEPAGE) { > - mmap_read_lock(mm); > - if (collapse_test_exit_or_disable(mm)) > - goto breakouterloop; > - *result = collapse_pte_mapped_thp(mm, > - khugepaged_scan.address, false); > - if (*result == SCAN_PMD_MAPPED) > - *result = SCAN_SUCCEED; > - mmap_read_unlock(mm); > - } > - } else { > - *result = collapse_scan_pmd(mm, vma, > - khugepaged_scan.address, &mmap_locked, cc); > - } > - > - if (*result == SCAN_SUCCEED) > - ++khugepaged_pages_collapsed; > > + *result = collapse_single_pmd(khugepaged_scan.address, > + vma, &mmap_locked, cc); > /* move to next address */ > khugepaged_scan.address += HPAGE_PMD_SIZE; > progress += HPAGE_PMD_NR; > @@ -2799,34 +2818,19 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start, > mmap_assert_locked(mm); > memset(cc->node_load, 0, sizeof(cc->node_load)); > nodes_clear(cc->alloc_nmask); > - if (!vma_is_anonymous(vma)) { > - struct file *file = get_file(vma->vm_file); > - pgoff_t pgoff = linear_page_index(vma, addr); > > - mmap_read_unlock(mm); > - mmap_locked = false; > - result = collapse_scan_file(mm, addr, file, pgoff, cc); > - fput(file); > - } else { > - result = collapse_scan_pmd(mm, vma, addr, > - &mmap_locked, cc); > - } > + result = collapse_single_pmd(addr, vma, &mmap_locked, cc); > + Ack the fact you noted the behaviour change re: collapse_test_exit_or_disable() that seems fine. > if (!mmap_locked) > *lock_dropped = true; > > -handle_result: > switch (result) { > case SCAN_SUCCEED: > case SCAN_PMD_MAPPED: > ++thps; > break; > - case SCAN_PTE_MAPPED_HUGEPAGE: > - BUG_ON(mmap_locked); > - mmap_read_lock(mm); > - result = collapse_pte_mapped_thp(mm, addr, true); > - mmap_read_unlock(mm); > - goto handle_result; One thing that differs with new code her is we filter SCAN_PMD_MAPPED to SCAN_SUCCEED. I was about to say 'but ++thps - is this correct' but now I realise this was looping back on itself with a goto to do just that... ugh ye gads. Anwyay that's fine because it doesn't change anything. Re: switch statement in general, again would be good to always have each scan possibility in switch statements, but perhaps given so many not practical :) (that way the compiler warns on missing a newly added enum val) > /* Whitelisted set of results where continuing OK */ > + case SCAN_PTE_MAPPED_HUGEPAGE: > case SCAN_PMD_NULL: > case SCAN_PTE_NON_PRESENT: > case SCAN_PTE_UFFD_WP: > -- > 2.50.1 >
On Wed, Aug 20, 2025 at 5:22 AM Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote: > > On Tue, Aug 19, 2025 at 07:41:54AM -0600, Nico Pache wrote: > > The khugepaged daemon and madvise_collapse have two different > > implementations that do almost the same thing. > > > > Create collapse_single_pmd to increase code reuse and create an entry > > point to these two users. > > > > Refactor madvise_collapse and collapse_scan_mm_slot to use the new > > collapse_single_pmd function. This introduces a minor behavioral change > > that is most likely an undiscovered bug. The current implementation of > > khugepaged tests collapse_test_exit_or_disable before calling > > collapse_pte_mapped_thp, but we weren't doing it in the madvise_collapse > > case. By unifying these two callers madvise_collapse now also performs > > this check. > > > > Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com> > > Acked-by: David Hildenbrand <david@redhat.com> > > Signed-off-by: Nico Pache <npache@redhat.com> > > --- > > mm/khugepaged.c | 94 ++++++++++++++++++++++++++----------------------- > > 1 file changed, 49 insertions(+), 45 deletions(-) > > > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > > index 0e7bbadf03ee..b7b98aebb670 100644 > > --- a/mm/khugepaged.c > > +++ b/mm/khugepaged.c > > @@ -2382,6 +2382,50 @@ static int collapse_scan_file(struct mm_struct *mm, unsigned long addr, > > return result; > > } > > > > +/* > > + * Try to collapse a single PMD starting at a PMD aligned addr, and return > > + * the results. > > + */ > > +static int collapse_single_pmd(unsigned long addr, > > + struct vm_area_struct *vma, bool *mmap_locked, > > + struct collapse_control *cc) > > +{ > > + int result = SCAN_FAIL; > > You assign result in all branches, so this can be uninitialised. ack, thanks. > > > + struct mm_struct *mm = vma->vm_mm; > > + > > + if (!vma_is_anonymous(vma)) { > > + struct file *file = get_file(vma->vm_file); > > + pgoff_t pgoff = linear_page_index(vma, addr); > > + > > + mmap_read_unlock(mm); > > + *mmap_locked = false; > > + result = collapse_scan_file(mm, addr, file, pgoff, cc); > > + fput(file); > > + if (result == SCAN_PTE_MAPPED_HUGEPAGE) { > > + mmap_read_lock(mm); > > + *mmap_locked = true; > > + if (collapse_test_exit_or_disable(mm)) { > > + mmap_read_unlock(mm); > > + *mmap_locked = false; > > + result = SCAN_ANY_PROCESS; > > + goto end; > > Don't love that in e.g. collapse_scan_mm_slot() we are using the mmap lock being > disabled as in effect an error code. > > Is SCAN_ANY_PROCESS correct here? Because in collapse_scan_mm_slot() you'd > previously: https://lore.kernel.org/lkml/a881ed65-351a-469f-b625-a3066d0f1d5c@linux.alibaba.com/ Baolin brought up a good point a while back that if collapse_test_exit_or_disable returns true we will be breaking out of the loop and should change the return value to indicate this. So to combine the madvise breakout and the scan_slot breakout we drop the lock and return SCAN_ANY_PROCESS. > > if (*result == SCAN_PTE_MAPPED_HUGEPAGE) { > mmap_read_lock(mm); > if (collapse_test_exit_or_disable(mm)) > goto breakouterloop; > ... > } > > But now you're setting result = SCAN_ANY_PROCESS rather than > SCAN_PTE_MAPPED_HUGEPAGE in this instance? > > You don't mention that you're changing this, or at least explicitly enough, > the commit message should state that you're changing this and explain why > it's ok. I do state it but perhaps I need to be more verbose! I will update the message to state we are also changing the result value too. > > This whole file is horrid, and it's kinda an aside, but I really wish we > had some comment going through each of the scan_result cases and explaining > what each one meant. Yeah its been a huge pain to have to investigate what everything is supposed to mean, and I often have to go searching to confirm things. include/trace/events/huge_memory.h has a "good" summary of them > > Also I think: > > return SCAN_ANY_PROCESS; > > Is better than: > > result = SCAN_ANY_PROCESS; > goto end; I agree! I will change that :) > ... > end: > return result; > > > + } > > + result = collapse_pte_mapped_thp(mm, addr, > > + !cc->is_khugepaged); > > Hm another change here, in the original code in collapse_scan_mm_slot() > this is: > > *result = collapse_pte_mapped_thp(mm, > khugepaged_scan.address, false); > > Presumably collapse_scan_mm_slot() is only ever invoked with > cc->is_khugepaged? Correct, but the madvise_collapse calls this with true, hence why it now depends on the is_khugepaged variable. No functional change here. > > Maybe worth adding a VM_WARN_ON_ONCE(!cc->is_khugepaged) at the top of > collapse_scan_mm_slot() to assert this (and other places where your change > assumes this to be the case). Ok I will investigate doing that but it would take a huge mistake to hit that assertion. > > > > + if (result == SCAN_PMD_MAPPED) > > + result = SCAN_SUCCEED; > > + mmap_read_unlock(mm); > > + *mmap_locked = false; > > + } > > + } else { > > + result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc); > > + } > > + if (cc->is_khugepaged && result == SCAN_SUCCEED) > > + ++khugepaged_pages_collapsed; > > Similarly, presumably because collapse_scan_mm_slot() only ever invoked > khugepaged case this didn't have the cc->is_khugepaged check? Correct, we only increment this when its khugepaged, so we need to guard it so madvise collapse wont increment this. > > > +end: > > + return result; > > +} > > There's a LOT of nesting going on here, I think we can simplify this a > lot. If we make the change I noted above re: returning SCAN_ANY_PROCESS< we > can move the end label up a bit and avoid a ton of nesting, e.g.: Ah I like this much more, I will try to implement/test it. > > static int collapse_single_pmd(unsigned long addr, > struct vm_area_struct *vma, bool *mmap_locked, > struct collapse_control *cc) > { > struct mm_struct *mm = vma->vm_mm; > struct file *file; > pgoff_t pgoff; > int result; > > if (vma_is_anonymous(vma)) { > result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc); > goto end: > } > > file = get_file(vma->vm_file); > pgoff = linear_page_index(vma, addr); > > mmap_read_unlock(mm); > *mmap_locked = false; > result = collapse_scan_file(mm, addr, file, pgoff, cc); > fput(file); > if (result != SCAN_PTE_MAPPED_HUGEPAGE) > goto end; > > mmap_read_lock(mm); > *mmap_locked = true; > if (collapse_test_exit_or_disable(mm)) { > mmap_read_unlock(mm); > *mmap_locked = false; > return SCAN_ANY_PROCESS; > } > result = collapse_pte_mapped_thp(mm, addr, !cc->is_khugepaged); > if (result == SCAN_PMD_MAPPED) > result = SCAN_SUCCEED; > mmap_read_unlock(mm); > *mmap_locked = false; > > end: > if (cc->is_khugepaged && result == SCAN_SUCCEED) > ++khugepaged_pages_collapsed; > > return result; > } > > (untested, thrown together so do double check!) > > > + > > static unsigned int collapse_scan_mm_slot(unsigned int pages, int *result, > > struct collapse_control *cc) > > __releases(&khugepaged_mm_lock) > > @@ -2455,34 +2499,9 @@ static unsigned int collapse_scan_mm_slot(unsigned int pages, int *result, > > VM_BUG_ON(khugepaged_scan.address < hstart || > > khugepaged_scan.address + HPAGE_PMD_SIZE > > > hend); > > - if (!vma_is_anonymous(vma)) { > > - struct file *file = get_file(vma->vm_file); > > - pgoff_t pgoff = linear_page_index(vma, > > - khugepaged_scan.address); > > - > > - mmap_read_unlock(mm); > > - mmap_locked = false; > > - *result = collapse_scan_file(mm, > > - khugepaged_scan.address, file, pgoff, cc); > > - fput(file); > > - if (*result == SCAN_PTE_MAPPED_HUGEPAGE) { > > - mmap_read_lock(mm); > > - if (collapse_test_exit_or_disable(mm)) > > - goto breakouterloop; > > - *result = collapse_pte_mapped_thp(mm, > > - khugepaged_scan.address, false); > > - if (*result == SCAN_PMD_MAPPED) > > - *result = SCAN_SUCCEED; > > - mmap_read_unlock(mm); > > - } > > - } else { > > - *result = collapse_scan_pmd(mm, vma, > > - khugepaged_scan.address, &mmap_locked, cc); > > - } > > - > > - if (*result == SCAN_SUCCEED) > > - ++khugepaged_pages_collapsed; > > > > + *result = collapse_single_pmd(khugepaged_scan.address, > > + vma, &mmap_locked, cc); > > /* move to next address */ > > khugepaged_scan.address += HPAGE_PMD_SIZE; > > progress += HPAGE_PMD_NR; > > @@ -2799,34 +2818,19 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start, > > mmap_assert_locked(mm); > > memset(cc->node_load, 0, sizeof(cc->node_load)); > > nodes_clear(cc->alloc_nmask); > > - if (!vma_is_anonymous(vma)) { > > - struct file *file = get_file(vma->vm_file); > > - pgoff_t pgoff = linear_page_index(vma, addr); > > > > - mmap_read_unlock(mm); > > - mmap_locked = false; > > - result = collapse_scan_file(mm, addr, file, pgoff, cc); > > - fput(file); > > - } else { > > - result = collapse_scan_pmd(mm, vma, addr, > > - &mmap_locked, cc); > > - } > > + result = collapse_single_pmd(addr, vma, &mmap_locked, cc); > > + > > Ack the fact you noted the behaviour change re: > collapse_test_exit_or_disable() that seems fine. > > > if (!mmap_locked) > > *lock_dropped = true; > > > > -handle_result: > > switch (result) { > > case SCAN_SUCCEED: > > case SCAN_PMD_MAPPED: > > ++thps; > > break; > > - case SCAN_PTE_MAPPED_HUGEPAGE: > > - BUG_ON(mmap_locked); > > - mmap_read_lock(mm); > > - result = collapse_pte_mapped_thp(mm, addr, true); > > - mmap_read_unlock(mm); > > - goto handle_result; > > One thing that differs with new code her is we filter SCAN_PMD_MAPPED to > SCAN_SUCCEED. > > I was about to say 'but ++thps - is this correct' but now I realise this > was looping back on itself with a goto to do just that... ugh ye gads. > > Anwyay that's fine because it doesn't change anything. > > Re: switch statement in general, again would be good to always have each > scan possibility in switch statements, but perhaps given so many not > practical :) Yeah it may be worth investigating for future changes I have for khugepaged (including the new switch statement I implement later and you commented on) > > (that way the compiler warns on missing a newly added enum val) > > > /* Whitelisted set of results where continuing OK */ > > + case SCAN_PTE_MAPPED_HUGEPAGE: > > case SCAN_PMD_NULL: > > case SCAN_PTE_NON_PRESENT: > > case SCAN_PTE_UFFD_WP: > > -- Thanks for the review :) -- Nico > > 2.50.1 > > >
On Wed, Aug 20, 2025 at 10:35:57AM -0600, Nico Pache wrote: > On Wed, Aug 20, 2025 at 5:22 AM Lorenzo Stoakes > <lorenzo.stoakes@oracle.com> wrote: > > > > On Tue, Aug 19, 2025 at 07:41:54AM -0600, Nico Pache wrote: > > > The khugepaged daemon and madvise_collapse have two different > > > implementations that do almost the same thing. > > > > > > Create collapse_single_pmd to increase code reuse and create an entry > > > point to these two users. > > > > > > Refactor madvise_collapse and collapse_scan_mm_slot to use the new > > > collapse_single_pmd function. This introduces a minor behavioral change > > > that is most likely an undiscovered bug. The current implementation of > > > khugepaged tests collapse_test_exit_or_disable before calling > > > collapse_pte_mapped_thp, but we weren't doing it in the madvise_collapse > > > case. By unifying these two callers madvise_collapse now also performs > > > this check. > > > > > > Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com> > > > Acked-by: David Hildenbrand <david@redhat.com> > > > Signed-off-by: Nico Pache <npache@redhat.com> > > > --- > > > mm/khugepaged.c | 94 ++++++++++++++++++++++++++----------------------- > > > 1 file changed, 49 insertions(+), 45 deletions(-) > > > > > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > > > index 0e7bbadf03ee..b7b98aebb670 100644 > > > --- a/mm/khugepaged.c > > > +++ b/mm/khugepaged.c > > > @@ -2382,6 +2382,50 @@ static int collapse_scan_file(struct mm_struct *mm, unsigned long addr, > > > return result; > > > } > > > > > > +/* > > > + * Try to collapse a single PMD starting at a PMD aligned addr, and return > > > + * the results. > > > + */ > > > +static int collapse_single_pmd(unsigned long addr, > > > + struct vm_area_struct *vma, bool *mmap_locked, > > > + struct collapse_control *cc) > > > +{ > > > + int result = SCAN_FAIL; > > > > You assign result in all branches, so this can be uninitialised. > ack, thanks. > > > > > + struct mm_struct *mm = vma->vm_mm; > > > + > > > + if (!vma_is_anonymous(vma)) { > > > + struct file *file = get_file(vma->vm_file); > > > + pgoff_t pgoff = linear_page_index(vma, addr); > > > + > > > + mmap_read_unlock(mm); > > > + *mmap_locked = false; > > > + result = collapse_scan_file(mm, addr, file, pgoff, cc); > > > + fput(file); > > > + if (result == SCAN_PTE_MAPPED_HUGEPAGE) { > > > + mmap_read_lock(mm); > > > + *mmap_locked = true; > > > + if (collapse_test_exit_or_disable(mm)) { > > > + mmap_read_unlock(mm); > > > + *mmap_locked = false; > > > + result = SCAN_ANY_PROCESS; > > > + goto end; > > > > Don't love that in e.g. collapse_scan_mm_slot() we are using the mmap lock being > > disabled as in effect an error code. > > > > Is SCAN_ANY_PROCESS correct here? Because in collapse_scan_mm_slot() you'd > > previously: > https://lore.kernel.org/lkml/a881ed65-351a-469f-b625-a3066d0f1d5c@linux.alibaba.com/ > Baolin brought up a good point a while back that if > collapse_test_exit_or_disable returns true we will be breaking out of > the loop and should change the return value to indicate this. So to > combine the madvise breakout and the scan_slot breakout we drop the > lock and return SCAN_ANY_PROCESS. Let's document in commit msg, as Liam's pointed out it's really important to track things, and part of that as well is detailing in the commit message what you're doing + why. With the THP code being as 'organically grown' as it is shall we say :) it's even more mportant to be specific. > > > > if (*result == SCAN_PTE_MAPPED_HUGEPAGE) { > > mmap_read_lock(mm); > > if (collapse_test_exit_or_disable(mm)) > > goto breakouterloop; > > ... > > } > > > > But now you're setting result = SCAN_ANY_PROCESS rather than > > SCAN_PTE_MAPPED_HUGEPAGE in this instance? > > > > You don't mention that you're changing this, or at least explicitly enough, > > the commit message should state that you're changing this and explain why > > it's ok. > I do state it but perhaps I need to be more verbose! I will update the > message to state we are also changing the result value too. Thanks! > > > > This whole file is horrid, and it's kinda an aside, but I really wish we > > had some comment going through each of the scan_result cases and explaining > > what each one meant. > Yeah its been a huge pain to have to investigate what everything is > supposed to mean, and I often have to go searching to confirm things. > include/trace/events/huge_memory.h has a "good" summary of them > > > > Also I think: > > > > return SCAN_ANY_PROCESS; > > > > Is better than: > > > > result = SCAN_ANY_PROCESS; > > goto end; > I agree! I will change that :) > > ... > > end: > > return result; > > > > > + } > > > + result = collapse_pte_mapped_thp(mm, addr, > > > + !cc->is_khugepaged); > > > > Hm another change here, in the original code in collapse_scan_mm_slot() > > this is: > > > > *result = collapse_pte_mapped_thp(mm, > > khugepaged_scan.address, false); > > > > Presumably collapse_scan_mm_slot() is only ever invoked with > > cc->is_khugepaged? > Correct, but the madvise_collapse calls this with true, hence why it > now depends on the is_khugepaged variable. No functional change here. > > > > Maybe worth adding a VM_WARN_ON_ONCE(!cc->is_khugepaged) at the top of > > collapse_scan_mm_slot() to assert this (and other places where your change > > assumes this to be the case). > Ok I will investigate doing that but it would take a huge mistake to > hit that assertion. > > > > > > > + if (result == SCAN_PMD_MAPPED) > > > + result = SCAN_SUCCEED; > > > + mmap_read_unlock(mm); > > > + *mmap_locked = false; > > > + } > > > + } else { > > > + result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc); > > > + } > > > + if (cc->is_khugepaged && result == SCAN_SUCCEED) > > > + ++khugepaged_pages_collapsed; > > > > Similarly, presumably because collapse_scan_mm_slot() only ever invoked > > khugepaged case this didn't have the cc->is_khugepaged check? > Correct, we only increment this when its khugepaged, so we need to > guard it so madvise collapse wont increment this. You know what I'm going to say :) commit message please! > > > > > +end: > > > + return result; > > > +} > > > > There's a LOT of nesting going on here, I think we can simplify this a > > lot. If we make the change I noted above re: returning SCAN_ANY_PROCESS< we > > can move the end label up a bit and avoid a ton of nesting, e.g.: > Ah I like this much more, I will try to implement/test it. > > > > static int collapse_single_pmd(unsigned long addr, > > struct vm_area_struct *vma, bool *mmap_locked, > > struct collapse_control *cc) > > { > > struct mm_struct *mm = vma->vm_mm; > > struct file *file; > > pgoff_t pgoff; > > int result; > > > > if (vma_is_anonymous(vma)) { > > result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc); > > goto end: > > } > > > > file = get_file(vma->vm_file); > > pgoff = linear_page_index(vma, addr); > > > > mmap_read_unlock(mm); > > *mmap_locked = false; > > result = collapse_scan_file(mm, addr, file, pgoff, cc); > > fput(file); > > if (result != SCAN_PTE_MAPPED_HUGEPAGE) > > goto end; > > > > mmap_read_lock(mm); > > *mmap_locked = true; > > if (collapse_test_exit_or_disable(mm)) { > > mmap_read_unlock(mm); > > *mmap_locked = false; > > return SCAN_ANY_PROCESS; > > } > > result = collapse_pte_mapped_thp(mm, addr, !cc->is_khugepaged); > > if (result == SCAN_PMD_MAPPED) > > result = SCAN_SUCCEED; > > mmap_read_unlock(mm); > > *mmap_locked = false; > > > > end: > > if (cc->is_khugepaged && result == SCAN_SUCCEED) > > ++khugepaged_pages_collapsed; > > > > return result; > > } > > > > (untested, thrown together so do double check!) This suggested refactoring work for you? > > > > > + > > > static unsigned int collapse_scan_mm_slot(unsigned int pages, int *result, > > > struct collapse_control *cc) > > > __releases(&khugepaged_mm_lock) > > > @@ -2455,34 +2499,9 @@ static unsigned int collapse_scan_mm_slot(unsigned int pages, int *result, > > > VM_BUG_ON(khugepaged_scan.address < hstart || > > > khugepaged_scan.address + HPAGE_PMD_SIZE > > > > hend); > > > - if (!vma_is_anonymous(vma)) { > > > - struct file *file = get_file(vma->vm_file); > > > - pgoff_t pgoff = linear_page_index(vma, > > > - khugepaged_scan.address); > > > - > > > - mmap_read_unlock(mm); > > > - mmap_locked = false; > > > - *result = collapse_scan_file(mm, > > > - khugepaged_scan.address, file, pgoff, cc); > > > - fput(file); > > > - if (*result == SCAN_PTE_MAPPED_HUGEPAGE) { > > > - mmap_read_lock(mm); > > > - if (collapse_test_exit_or_disable(mm)) > > > - goto breakouterloop; > > > - *result = collapse_pte_mapped_thp(mm, > > > - khugepaged_scan.address, false); > > > - if (*result == SCAN_PMD_MAPPED) > > > - *result = SCAN_SUCCEED; > > > - mmap_read_unlock(mm); > > > - } > > > - } else { > > > - *result = collapse_scan_pmd(mm, vma, > > > - khugepaged_scan.address, &mmap_locked, cc); > > > - } > > > - > > > - if (*result == SCAN_SUCCEED) > > > - ++khugepaged_pages_collapsed; > > > > > > + *result = collapse_single_pmd(khugepaged_scan.address, > > > + vma, &mmap_locked, cc); > > > /* move to next address */ > > > khugepaged_scan.address += HPAGE_PMD_SIZE; > > > progress += HPAGE_PMD_NR; > > > @@ -2799,34 +2818,19 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start, > > > mmap_assert_locked(mm); > > > memset(cc->node_load, 0, sizeof(cc->node_load)); > > > nodes_clear(cc->alloc_nmask); > > > - if (!vma_is_anonymous(vma)) { > > > - struct file *file = get_file(vma->vm_file); > > > - pgoff_t pgoff = linear_page_index(vma, addr); > > > > > > - mmap_read_unlock(mm); > > > - mmap_locked = false; > > > - result = collapse_scan_file(mm, addr, file, pgoff, cc); > > > - fput(file); > > > - } else { > > > - result = collapse_scan_pmd(mm, vma, addr, > > > - &mmap_locked, cc); > > > - } > > > + result = collapse_single_pmd(addr, vma, &mmap_locked, cc); > > > + > > > > Ack the fact you noted the behaviour change re: > > collapse_test_exit_or_disable() that seems fine. > > > > > if (!mmap_locked) > > > *lock_dropped = true; > > > > > > -handle_result: > > > switch (result) { > > > case SCAN_SUCCEED: > > > case SCAN_PMD_MAPPED: > > > ++thps; > > > break; > > > - case SCAN_PTE_MAPPED_HUGEPAGE: > > > - BUG_ON(mmap_locked); > > > - mmap_read_lock(mm); > > > - result = collapse_pte_mapped_thp(mm, addr, true); > > > - mmap_read_unlock(mm); > > > - goto handle_result; > > > > One thing that differs with new code her is we filter SCAN_PMD_MAPPED to > > SCAN_SUCCEED. > > > > I was about to say 'but ++thps - is this correct' but now I realise this > > was looping back on itself with a goto to do just that... ugh ye gads. > > > > Anwyay that's fine because it doesn't change anything. > > > > Re: switch statement in general, again would be good to always have each > > scan possibility in switch statements, but perhaps given so many not > > practical :) > > Yeah it may be worth investigating for future changes I have for > khugepaged (including the new switch statement I implement later and > you commented on) Ack yeah this can be one for the future! > > > > (that way the compiler warns on missing a newly added enum val) > > > > > /* Whitelisted set of results where continuing OK */ > > > + case SCAN_PTE_MAPPED_HUGEPAGE: > > > case SCAN_PMD_NULL: > > > case SCAN_PTE_NON_PRESENT: > > > case SCAN_PTE_UFFD_WP: > > > -- > > Thanks for the review :) No probs, to underline as well - the critique is to make sure we get this right, my aim here is to get your series landed in as good a form as possible :) > > -- Nico > > > 2.50.1 > > > > > > Cheers, Lorenzo
On Fri, Aug 22, 2025 at 4:23 AM Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote: > > On Wed, Aug 20, 2025 at 10:35:57AM -0600, Nico Pache wrote: > > On Wed, Aug 20, 2025 at 5:22 AM Lorenzo Stoakes > > <lorenzo.stoakes@oracle.com> wrote: > > > > > > On Tue, Aug 19, 2025 at 07:41:54AM -0600, Nico Pache wrote: > > > > The khugepaged daemon and madvise_collapse have two different > > > > implementations that do almost the same thing. > > > > > > > > Create collapse_single_pmd to increase code reuse and create an entry > > > > point to these two users. > > > > > > > > Refactor madvise_collapse and collapse_scan_mm_slot to use the new > > > > collapse_single_pmd function. This introduces a minor behavioral change > > > > that is most likely an undiscovered bug. The current implementation of > > > > khugepaged tests collapse_test_exit_or_disable before calling > > > > collapse_pte_mapped_thp, but we weren't doing it in the madvise_collapse > > > > case. By unifying these two callers madvise_collapse now also performs > > > > this check. > > > > > > > > Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com> > > > > Acked-by: David Hildenbrand <david@redhat.com> > > > > Signed-off-by: Nico Pache <npache@redhat.com> > > > > --- > > > > mm/khugepaged.c | 94 ++++++++++++++++++++++++++----------------------- > > > > 1 file changed, 49 insertions(+), 45 deletions(-) > > > > > > > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > > > > index 0e7bbadf03ee..b7b98aebb670 100644 > > > > --- a/mm/khugepaged.c > > > > +++ b/mm/khugepaged.c > > > > @@ -2382,6 +2382,50 @@ static int collapse_scan_file(struct mm_struct *mm, unsigned long addr, > > > > return result; > > > > } > > > > > > > > +/* > > > > + * Try to collapse a single PMD starting at a PMD aligned addr, and return > > > > + * the results. > > > > + */ > > > > +static int collapse_single_pmd(unsigned long addr, > > > > + struct vm_area_struct *vma, bool *mmap_locked, > > > > + struct collapse_control *cc) > > > > +{ > > > > + int result = SCAN_FAIL; > > > > > > You assign result in all branches, so this can be uninitialised. > > ack, thanks. > > > > > > > + struct mm_struct *mm = vma->vm_mm; > > > > + > > > > + if (!vma_is_anonymous(vma)) { > > > > + struct file *file = get_file(vma->vm_file); > > > > + pgoff_t pgoff = linear_page_index(vma, addr); > > > > + > > > > + mmap_read_unlock(mm); > > > > + *mmap_locked = false; > > > > + result = collapse_scan_file(mm, addr, file, pgoff, cc); > > > > + fput(file); > > > > + if (result == SCAN_PTE_MAPPED_HUGEPAGE) { > > > > + mmap_read_lock(mm); > > > > + *mmap_locked = true; > > > > + if (collapse_test_exit_or_disable(mm)) { > > > > + mmap_read_unlock(mm); > > > > + *mmap_locked = false; > > > > + result = SCAN_ANY_PROCESS; > > > > + goto end; > > > > > > Don't love that in e.g. collapse_scan_mm_slot() we are using the mmap lock being > > > disabled as in effect an error code. > > > > > > Is SCAN_ANY_PROCESS correct here? Because in collapse_scan_mm_slot() you'd > > > previously: > > https://lore.kernel.org/lkml/a881ed65-351a-469f-b625-a3066d0f1d5c@linux.alibaba.com/ > > Baolin brought up a good point a while back that if > > collapse_test_exit_or_disable returns true we will be breaking out of > > the loop and should change the return value to indicate this. So to > > combine the madvise breakout and the scan_slot breakout we drop the > > lock and return SCAN_ANY_PROCESS. > > Let's document in commit msg, as Liam's pointed out it's really important to > track things, and part of that as well is detailing in the commit message what > you're doing + why. ack! thanks > > With the THP code being as 'organically grown' as it is shall we say :) it's > even more mportant to be specific. > > > > > > > if (*result == SCAN_PTE_MAPPED_HUGEPAGE) { > > > mmap_read_lock(mm); > > > if (collapse_test_exit_or_disable(mm)) > > > goto breakouterloop; > > > ... > > > } > > > > > > But now you're setting result = SCAN_ANY_PROCESS rather than > > > SCAN_PTE_MAPPED_HUGEPAGE in this instance? > > > > > > You don't mention that you're changing this, or at least explicitly enough, > > > the commit message should state that you're changing this and explain why > > > it's ok. > > I do state it but perhaps I need to be more verbose! I will update the > > message to state we are also changing the result value too. > > Thanks! > > > > > > > This whole file is horrid, and it's kinda an aside, but I really wish we > > > had some comment going through each of the scan_result cases and explaining > > > what each one meant. > > Yeah its been a huge pain to have to investigate what everything is > > supposed to mean, and I often have to go searching to confirm things. > > include/trace/events/huge_memory.h has a "good" summary of them > > > > > > Also I think: > > > > > > return SCAN_ANY_PROCESS; > > > > > > Is better than: > > > > > > result = SCAN_ANY_PROCESS; > > > goto end; > > I agree! I will change that :) > > > ... > > > end: > > > return result; > > > > > > > + } > > > > + result = collapse_pte_mapped_thp(mm, addr, > > > > + !cc->is_khugepaged); > > > > > > Hm another change here, in the original code in collapse_scan_mm_slot() > > > this is: > > > > > > *result = collapse_pte_mapped_thp(mm, > > > khugepaged_scan.address, false); > > > > > > Presumably collapse_scan_mm_slot() is only ever invoked with > > > cc->is_khugepaged? > > Correct, but the madvise_collapse calls this with true, hence why it > > now depends on the is_khugepaged variable. No functional change here. > > > > > > Maybe worth adding a VM_WARN_ON_ONCE(!cc->is_khugepaged) at the top of > > > collapse_scan_mm_slot() to assert this (and other places where your change > > > assumes this to be the case). > > Ok I will investigate doing that but it would take a huge mistake to > > hit that assertion. > > > > > > > > > > + if (result == SCAN_PMD_MAPPED) > > > > + result = SCAN_SUCCEED; > > > > + mmap_read_unlock(mm); > > > > + *mmap_locked = false; > > > > + } > > > > + } else { > > > > + result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc); > > > > + } > > > > + if (cc->is_khugepaged && result == SCAN_SUCCEED) > > > > + ++khugepaged_pages_collapsed; > > > > > > Similarly, presumably because collapse_scan_mm_slot() only ever invoked > > > khugepaged case this didn't have the cc->is_khugepaged check? > > Correct, we only increment this when its khugepaged, so we need to > > guard it so madvise collapse wont increment this. > > You know what I'm going to say :) commit message please! ack, although this isnt anything new. I just needed to add it because madvise collapse doesnt increment this. Still I'll add a blurb. > > > > > > > > +end: > > > > + return result; > > > > +} > > > > > > There's a LOT of nesting going on here, I think we can simplify this a > > > lot. If we make the change I noted above re: returning SCAN_ANY_PROCESS< we > > > can move the end label up a bit and avoid a ton of nesting, e.g.: > > Ah I like this much more, I will try to implement/test it. > > > > > > static int collapse_single_pmd(unsigned long addr, > > > struct vm_area_struct *vma, bool *mmap_locked, > > > struct collapse_control *cc) > > > { > > > struct mm_struct *mm = vma->vm_mm; > > > struct file *file; > > > pgoff_t pgoff; > > > int result; > > > > > > if (vma_is_anonymous(vma)) { > > > result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc); > > > goto end: > > > } > > > > > > file = get_file(vma->vm_file); > > > pgoff = linear_page_index(vma, addr); > > > > > > mmap_read_unlock(mm); > > > *mmap_locked = false; > > > result = collapse_scan_file(mm, addr, file, pgoff, cc); > > > fput(file); > > > if (result != SCAN_PTE_MAPPED_HUGEPAGE) > > > goto end; > > > > > > mmap_read_lock(mm); > > > *mmap_locked = true; > > > if (collapse_test_exit_or_disable(mm)) { > > > mmap_read_unlock(mm); > > > *mmap_locked = false; > > > return SCAN_ANY_PROCESS; > > > } > > > result = collapse_pte_mapped_thp(mm, addr, !cc->is_khugepaged); > > > if (result == SCAN_PMD_MAPPED) > > > result = SCAN_SUCCEED; > > > mmap_read_unlock(mm); > > > *mmap_locked = false; > > > > > > end: > > > if (cc->is_khugepaged && result == SCAN_SUCCEED) > > > ++khugepaged_pages_collapsed; > > > > > > return result; > > > } > > > > > > (untested, thrown together so do double check!) > > This suggested refactoring work for you? Looks correct, I'm going to implement all the changes then test to make sure it works as intended. > > > > > > > > + > > > > static unsigned int collapse_scan_mm_slot(unsigned int pages, int *result, > > > > struct collapse_control *cc) > > > > __releases(&khugepaged_mm_lock) > > > > @@ -2455,34 +2499,9 @@ static unsigned int collapse_scan_mm_slot(unsigned int pages, int *result, > > > > VM_BUG_ON(khugepaged_scan.address < hstart || > > > > khugepaged_scan.address + HPAGE_PMD_SIZE > > > > > hend); > > > > - if (!vma_is_anonymous(vma)) { > > > > - struct file *file = get_file(vma->vm_file); > > > > - pgoff_t pgoff = linear_page_index(vma, > > > > - khugepaged_scan.address); > > > > - > > > > - mmap_read_unlock(mm); > > > > - mmap_locked = false; > > > > - *result = collapse_scan_file(mm, > > > > - khugepaged_scan.address, file, pgoff, cc); > > > > - fput(file); > > > > - if (*result == SCAN_PTE_MAPPED_HUGEPAGE) { > > > > - mmap_read_lock(mm); > > > > - if (collapse_test_exit_or_disable(mm)) > > > > - goto breakouterloop; > > > > - *result = collapse_pte_mapped_thp(mm, > > > > - khugepaged_scan.address, false); > > > > - if (*result == SCAN_PMD_MAPPED) > > > > - *result = SCAN_SUCCEED; > > > > - mmap_read_unlock(mm); > > > > - } > > > > - } else { > > > > - *result = collapse_scan_pmd(mm, vma, > > > > - khugepaged_scan.address, &mmap_locked, cc); > > > > - } > > > > - > > > > - if (*result == SCAN_SUCCEED) > > > > - ++khugepaged_pages_collapsed; > > > > > > > > + *result = collapse_single_pmd(khugepaged_scan.address, > > > > + vma, &mmap_locked, cc); > > > > /* move to next address */ > > > > khugepaged_scan.address += HPAGE_PMD_SIZE; > > > > progress += HPAGE_PMD_NR; > > > > @@ -2799,34 +2818,19 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start, > > > > mmap_assert_locked(mm); > > > > memset(cc->node_load, 0, sizeof(cc->node_load)); > > > > nodes_clear(cc->alloc_nmask); > > > > - if (!vma_is_anonymous(vma)) { > > > > - struct file *file = get_file(vma->vm_file); > > > > - pgoff_t pgoff = linear_page_index(vma, addr); > > > > > > > > - mmap_read_unlock(mm); > > > > - mmap_locked = false; > > > > - result = collapse_scan_file(mm, addr, file, pgoff, cc); > > > > - fput(file); > > > > - } else { > > > > - result = collapse_scan_pmd(mm, vma, addr, > > > > - &mmap_locked, cc); > > > > - } > > > > + result = collapse_single_pmd(addr, vma, &mmap_locked, cc); > > > > + > > > > > > Ack the fact you noted the behaviour change re: > > > collapse_test_exit_or_disable() that seems fine. > > > > > > > if (!mmap_locked) > > > > *lock_dropped = true; > > > > > > > > -handle_result: > > > > switch (result) { > > > > case SCAN_SUCCEED: > > > > case SCAN_PMD_MAPPED: > > > > ++thps; > > > > break; > > > > - case SCAN_PTE_MAPPED_HUGEPAGE: > > > > - BUG_ON(mmap_locked); > > > > - mmap_read_lock(mm); > > > > - result = collapse_pte_mapped_thp(mm, addr, true); > > > > - mmap_read_unlock(mm); > > > > - goto handle_result; > > > > > > One thing that differs with new code her is we filter SCAN_PMD_MAPPED to > > > SCAN_SUCCEED. > > > > > > I was about to say 'but ++thps - is this correct' but now I realise this > > > was looping back on itself with a goto to do just that... ugh ye gads. > > > > > > Anwyay that's fine because it doesn't change anything. > > > > > > Re: switch statement in general, again would be good to always have each > > > scan possibility in switch statements, but perhaps given so many not > > > practical :) > > > > Yeah it may be worth investigating for future changes I have for > > khugepaged (including the new switch statement I implement later and > > you commented on) > > Ack yeah this can be one for the future! > > > > > > > (that way the compiler warns on missing a newly added enum val) > > > > > > > /* Whitelisted set of results where continuing OK */ > > > > + case SCAN_PTE_MAPPED_HUGEPAGE: > > > > case SCAN_PMD_NULL: > > > > case SCAN_PTE_NON_PRESENT: > > > > case SCAN_PTE_UFFD_WP: > > > > -- > > > > Thanks for the review :) > > No probs, to underline as well - the critique is to make sure we get this right, > my aim here is to get your series landed in as good a form as possible :) All critiquing is welcome and appreciated :) The refactoring looks much better now too! Cheers, -- Nico > > > > > -- Nico > > > > 2.50.1 > > > > > > > > > > > Cheers, Lorenzo >
© 2016 - 2025 Red Hat, Inc.