READ_ONLY_THP_FOR_FS is no longer present, remove corresponding code.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
mm/khugepaged.c | 159 +++++++++++-------------------------------------
1 file changed, 34 insertions(+), 125 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b2ac28ddd480..39f0b8959535 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1899,7 +1899,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
int nr_none = 0;
bool is_shmem = shmem_file(file);
- VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
+ VM_WARN_ON_ONCE(!is_shmem);
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
result = alloc_charge_folio(&new_folio, mm, cc);
@@ -1909,8 +1909,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
mapping_set_update(&xas, mapping);
__folio_set_locked(new_folio);
- if (is_shmem)
- __folio_set_swapbacked(new_folio);
+ __folio_set_swapbacked(new_folio);
new_folio->index = start;
new_folio->mapping = mapping;
@@ -1935,83 +1934,39 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
folio = xas_load(&xas);
VM_BUG_ON(index != xas.xa_index);
- if (is_shmem) {
- if (!folio) {
- /*
- * Stop if extent has been truncated or
- * hole-punched, and is now completely
- * empty.
- */
- if (index == start) {
- if (!xas_next_entry(&xas, end - 1)) {
- result = SCAN_TRUNCATED;
- goto xa_locked;
- }
+ if (!folio) {
+ /*
+ * Stop if extent has been truncated or
+ * hole-punched, and is now completely
+ * empty.
+ */
+ if (index == start) {
+ if (!xas_next_entry(&xas, end - 1)) {
+ result = SCAN_TRUNCATED;
+ goto xa_locked;
}
- nr_none++;
- index++;
- continue;
}
+ nr_none++;
+ index++;
+ continue;
+ }
- if (xa_is_value(folio) || !folio_test_uptodate(folio)) {
- xas_unlock_irq(&xas);
- /* swap in or instantiate fallocated page */
- if (shmem_get_folio(mapping->host, index, 0,
- &folio, SGP_NOALLOC)) {
- result = SCAN_FAIL;
- goto xa_unlocked;
- }
- /* drain lru cache to help folio_isolate_lru() */
- lru_add_drain();
- } else if (folio_trylock(folio)) {
- folio_get(folio);
- xas_unlock_irq(&xas);
- } else {
- result = SCAN_PAGE_LOCK;
- goto xa_locked;
- }
- } else { /* !is_shmem */
- if (!folio || xa_is_value(folio)) {
- xas_unlock_irq(&xas);
- page_cache_sync_readahead(mapping, &file->f_ra,
- file, index,
- end - index);
- /* drain lru cache to help folio_isolate_lru() */
- lru_add_drain();
- folio = filemap_lock_folio(mapping, index);
- if (IS_ERR(folio)) {
- result = SCAN_FAIL;
- goto xa_unlocked;
- }
- } else if (folio_test_dirty(folio)) {
- /*
- * khugepaged only works on read-only fd,
- * so this page is dirty because it hasn't
- * been flushed since first write. There
- * won't be new dirty pages.
- *
- * Trigger async flush here and hope the
- * writeback is done when khugepaged
- * revisits this page.
- *
- * This is a one-off situation. We are not
- * forcing writeback in loop.
- */
- xas_unlock_irq(&xas);
- filemap_flush(mapping);
- result = SCAN_PAGE_DIRTY_OR_WRITEBACK;
- goto xa_unlocked;
- } else if (folio_test_writeback(folio)) {
- xas_unlock_irq(&xas);
- result = SCAN_PAGE_DIRTY_OR_WRITEBACK;
+ if (xa_is_value(folio) || !folio_test_uptodate(folio)) {
+ xas_unlock_irq(&xas);
+ /* swap in or instantiate fallocated page */
+ if (shmem_get_folio(mapping->host, index, 0,
+ &folio, SGP_NOALLOC)) {
+ result = SCAN_FAIL;
goto xa_unlocked;
- } else if (folio_trylock(folio)) {
- folio_get(folio);
- xas_unlock_irq(&xas);
- } else {
- result = SCAN_PAGE_LOCK;
- goto xa_locked;
}
+ /* drain lru cache to help folio_isolate_lru() */
+ lru_add_drain();
+ } else if (folio_trylock(folio)) {
+ folio_get(folio);
+ xas_unlock_irq(&xas);
+ } else {
+ result = SCAN_PAGE_LOCK;
+ goto xa_locked;
}
/*
@@ -2041,17 +1996,6 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
goto out_unlock;
}
- if (!is_shmem && (folio_test_dirty(folio) ||
- folio_test_writeback(folio))) {
- /*
- * khugepaged only works on read-only fd, so this
- * folio is dirty because it hasn't been flushed
- * since first write.
- */
- result = SCAN_PAGE_DIRTY_OR_WRITEBACK;
- goto out_unlock;
- }
-
if (!folio_isolate_lru(folio)) {
result = SCAN_DEL_PAGE_LRU;
goto out_unlock;
@@ -2101,21 +2045,6 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
goto xa_unlocked;
}
- if (!is_shmem) {
- filemap_nr_thps_inc(mapping);
- /*
- * Paired with the fence in do_dentry_open() -> get_write_access()
- * to ensure i_writecount is up to date and the update to nr_thps
- * is visible. Ensures the page cache will be truncated if the
- * file is opened writable.
- */
- smp_mb();
- if (inode_is_open_for_write(mapping->host)) {
- result = SCAN_FAIL;
- filemap_nr_thps_dec(mapping);
- }
- }
-
xa_locked:
xas_unlock_irq(&xas);
xa_unlocked:
@@ -2224,12 +2153,8 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
xas_lock_irq(&xas);
}
- if (is_shmem) {
- lruvec_stat_mod_folio(new_folio, NR_SHMEM, HPAGE_PMD_NR);
- lruvec_stat_mod_folio(new_folio, NR_SHMEM_THPS, HPAGE_PMD_NR);
- } else {
- lruvec_stat_mod_folio(new_folio, NR_FILE_THPS, HPAGE_PMD_NR);
- }
+ lruvec_stat_mod_folio(new_folio, NR_SHMEM, HPAGE_PMD_NR);
+ lruvec_stat_mod_folio(new_folio, NR_SHMEM_THPS, HPAGE_PMD_NR);
lruvec_stat_mod_folio(new_folio, NR_FILE_PAGES, HPAGE_PMD_NR);
/*
@@ -2240,8 +2165,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
folio_mark_uptodate(new_folio);
folio_ref_add(new_folio, HPAGE_PMD_NR - 1);
- if (is_shmem)
- folio_mark_dirty(new_folio);
+ folio_mark_dirty(new_folio);
folio_add_lru(new_folio);
/* Join all the small entries into a single multi-index entry. */
@@ -2266,9 +2190,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
list_del(&folio->lru);
lruvec_stat_mod_folio(folio, NR_FILE_PAGES,
-folio_nr_pages(folio));
- if (is_shmem)
- lruvec_stat_mod_folio(folio, NR_SHMEM,
- -folio_nr_pages(folio));
+ lruvec_stat_mod_folio(folio, NR_SHMEM, -folio_nr_pages(folio));
folio->mapping = NULL;
folio_clear_active(folio);
folio_clear_unevictable(folio);
@@ -2293,19 +2215,6 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
folio_putback_lru(folio);
folio_put(folio);
}
- /*
- * Undo the updates of filemap_nr_thps_inc for non-SHMEM
- * file only. This undo is not needed unless failure is
- * due to SCAN_COPY_MC.
- */
- if (!is_shmem && result == SCAN_COPY_MC) {
- filemap_nr_thps_dec(mapping);
- /*
- * Paired with the fence in do_dentry_open() -> get_write_access()
- * to ensure the update to nr_thps is visible.
- */
- smp_mb();
- }
new_folio->mapping = NULL;
--
2.43.0
On 3/23/26 20:06, Zi Yan wrote: > READ_ONLY_THP_FOR_FS is no longer present, remove corresponding code. > > Signed-off-by: Zi Yan <ziy@nvidia.com> > --- > mm/khugepaged.c | 159 +++++++++++------------------------------------- > 1 file changed, 34 insertions(+), 125 deletions(-) > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index b2ac28ddd480..39f0b8959535 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -1899,7 +1899,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, > int nr_none = 0; > bool is_shmem = shmem_file(file); > > - VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); > + VM_WARN_ON_ONCE(!is_shmem); Oh, but if that means that khugepaged cannot collapse large folios in FSes anymore, then this is the wrong approach I suppose? I would have assumed that we would now collapse for any files that support large folios (in PMD size), not stopping to collapse entirely. -- Cheers, David
On 23 Mar 2026, at 15:41, David Hildenbrand (Arm) wrote: > On 3/23/26 20:06, Zi Yan wrote: >> READ_ONLY_THP_FOR_FS is no longer present, remove corresponding code. >> >> Signed-off-by: Zi Yan <ziy@nvidia.com> >> --- >> mm/khugepaged.c | 159 +++++++++++------------------------------------- >> 1 file changed, 34 insertions(+), 125 deletions(-) >> >> diff --git a/mm/khugepaged.c b/mm/khugepaged.c >> index b2ac28ddd480..39f0b8959535 100644 >> --- a/mm/khugepaged.c >> +++ b/mm/khugepaged.c >> @@ -1899,7 +1899,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, >> int nr_none = 0; >> bool is_shmem = shmem_file(file); >> >> - VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); >> + VM_WARN_ON_ONCE(!is_shmem); > > Oh, but if that means that khugepaged cannot collapse large folios in > FSes anymore, then this is the wrong approach I suppose? > > I would have assumed that we would now collapse for any files that > support large folios (in PMD size), not stopping to collapse entirely. My understanding is that collapse_file() is only used for READ_ONLY_THP_FOR_FS. If FSes with large folio support also use it, I can replace IS_ENABLE with mapping_large_folio_support(). Best Regards, Yan, Zi
On 3/23/26 20:47, Zi Yan wrote: > On 23 Mar 2026, at 15:41, David Hildenbrand (Arm) wrote: > >> On 3/23/26 20:06, Zi Yan wrote: >>> READ_ONLY_THP_FOR_FS is no longer present, remove corresponding code. >>> >>> Signed-off-by: Zi Yan <ziy@nvidia.com> >>> --- >>> mm/khugepaged.c | 159 +++++++++++------------------------------------- >>> 1 file changed, 34 insertions(+), 125 deletions(-) >>> >>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c >>> index b2ac28ddd480..39f0b8959535 100644 >>> --- a/mm/khugepaged.c >>> +++ b/mm/khugepaged.c >>> @@ -1899,7 +1899,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, >>> int nr_none = 0; >>> bool is_shmem = shmem_file(file); >>> >>> - VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); >>> + VM_WARN_ON_ONCE(!is_shmem); >> >> Oh, but if that means that khugepaged cannot collapse large folios in >> FSes anymore, then this is the wrong approach I suppose? >> >> I would have assumed that we would now collapse for any files that >> support large folios (in PMD size), not stopping to collapse entirely. > > My understanding is that collapse_file() is only used for > READ_ONLY_THP_FOR_FS. If FSes with large folio support also use it, > I can replace IS_ENABLE with mapping_large_folio_support(). Otherwise we'd be losing support for THP collapse in files? We'd have to cross fingers that readahead gives us some. So we have to be a bit careful here. We want khugepaged to collapse THPs in filesystems that support large folios even without READ_ONLY_THP_FOR_FS. -- Cheers, David
On 23 Mar 2026, at 15:53, David Hildenbrand (Arm) wrote: > On 3/23/26 20:47, Zi Yan wrote: >> On 23 Mar 2026, at 15:41, David Hildenbrand (Arm) wrote: >> >>> On 3/23/26 20:06, Zi Yan wrote: >>>> READ_ONLY_THP_FOR_FS is no longer present, remove corresponding code. >>>> >>>> Signed-off-by: Zi Yan <ziy@nvidia.com> >>>> --- >>>> mm/khugepaged.c | 159 +++++++++++------------------------------------- >>>> 1 file changed, 34 insertions(+), 125 deletions(-) >>>> >>>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c >>>> index b2ac28ddd480..39f0b8959535 100644 >>>> --- a/mm/khugepaged.c >>>> +++ b/mm/khugepaged.c >>>> @@ -1899,7 +1899,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, >>>> int nr_none = 0; >>>> bool is_shmem = shmem_file(file); >>>> >>>> - VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); >>>> + VM_WARN_ON_ONCE(!is_shmem); >>> >>> Oh, but if that means that khugepaged cannot collapse large folios in >>> FSes anymore, then this is the wrong approach I suppose? >>> >>> I would have assumed that we would now collapse for any files that >>> support large folios (in PMD size), not stopping to collapse entirely. >> >> My understanding is that collapse_file() is only used for >> READ_ONLY_THP_FOR_FS. If FSes with large folio support also use it, >> I can replace IS_ENABLE with mapping_large_folio_support(). > > Otherwise we'd be losing support for THP collapse in files? We'd have to > cross fingers that readahead gives us some. > > So we have to be a bit careful here. We want khugepaged to collapse THPs > in filesystems that support large folios even without READ_ONLY_THP_FOR_FS. Based on[1], collapse_file() only works on read-only fd. So it is not as general as all files. If we do what you suggested, the function will be collapse read-only folios from FSes with large folio support. BTW, has anyone used/relied on collapse_file() on any FS with large folio support? [1] https://elixir.bootlin.com/linux/v6.19.9/source/mm/khugepaged.c#L1956 Best Regards, Yan, Zi
On 3/23/26 20:59, Zi Yan wrote: > On 23 Mar 2026, at 15:53, David Hildenbrand (Arm) wrote: > >> On 3/23/26 20:47, Zi Yan wrote: >>> >>> >>> My understanding is that collapse_file() is only used for >>> READ_ONLY_THP_FOR_FS. If FSes with large folio support also use it, >>> I can replace IS_ENABLE with mapping_large_folio_support(). >> >> Otherwise we'd be losing support for THP collapse in files? We'd have to >> cross fingers that readahead gives us some. >> >> So we have to be a bit careful here. We want khugepaged to collapse THPs >> in filesystems that support large folios even without READ_ONLY_THP_FOR_FS. > > Based on[1], collapse_file() only works on read-only fd. So it is not > as general as all files. If we do what you suggested, the function > will be collapse read-only folios from FSes with large folio support. Right, I think that's the first step to keep the existing use case working. Long term, I suspect we might want to extend that to writable files etc. > > BTW, has anyone used/relied on collapse_file() on any FS with large folio > support? I assume so, because for executables you would have to be lucky to get a PMD THP? So I don't see the non-khugepaged large folio support on par with khugepaged support. -- Cheers, David
On 23 Mar 2026, at 16:13, David Hildenbrand (Arm) wrote: > On 3/23/26 20:59, Zi Yan wrote: >> On 23 Mar 2026, at 15:53, David Hildenbrand (Arm) wrote: >> >>> On 3/23/26 20:47, Zi Yan wrote: >>>> >>>> >>>> My understanding is that collapse_file() is only used for >>>> READ_ONLY_THP_FOR_FS. If FSes with large folio support also use it, >>>> I can replace IS_ENABLE with mapping_large_folio_support(). >>> >>> Otherwise we'd be losing support for THP collapse in files? We'd have to >>> cross fingers that readahead gives us some. >>> >>> So we have to be a bit careful here. We want khugepaged to collapse THPs >>> in filesystems that support large folios even without READ_ONLY_THP_FOR_FS. >> >> Based on[1], collapse_file() only works on read-only fd. So it is not >> as general as all files. If we do what you suggested, the function >> will be collapse read-only folios from FSes with large folio support. > > Right, I think that's the first step to keep the existing use case > working. Long term, I suspect we might want to extend that to writable > files etc. > >> >> BTW, has anyone used/relied on collapse_file() on any FS with large folio >> support? > > I assume so, because for executables you would have to be lucky to get a > PMD THP? So I don't see the non-khugepaged large folio support on par > with khugepaged support. It is more like turning on READ_ONLY_THP_FOR_FS by default for FS with large folio support instead of removing it. OK, I will give it another try. Best Regards, Yan, Zi
On Mon, Mar 23, 2026 at 04:24:26PM -0400, Zi Yan wrote: > On 23 Mar 2026, at 16:13, David Hildenbrand (Arm) wrote: > > I assume so, because for executables you would have to be lucky to get a > > PMD THP? So I don't see the non-khugepaged large folio support on par > > with khugepaged support. Not necessarily that lucky; if you set VM_HUGEPAGE, do_sync_mmap_readahead() will allocate PMD-sized folios automatically. On busy database servers (and is there any other kind?), khugepaged takes too long to run and find opportunities to collapse text pages. Like, days. > It is more like turning on READ_ONLY_THP_FOR_FS by default for > FS with large folio support instead of removing it. > > OK, I will give it another try. I think the test needs to be: if (mapping_max_folio_order(mapping) >= PMD_ORDER) as there can be cases of filesystems which support up to, say, 64KiB, but not all the way up to 2MiB. I disapprove of this situation, but this is where we are right now.
On 3/23/26 22:02, Matthew Wilcox wrote: > On Mon, Mar 23, 2026 at 04:24:26PM -0400, Zi Yan wrote: >> On 23 Mar 2026, at 16:13, David Hildenbrand (Arm) wrote: >>> I assume so, because for executables you would have to be lucky to get a >>> PMD THP? So I don't see the non-khugepaged large folio support on par >>> with khugepaged support. > > Not necessarily that lucky; if you set VM_HUGEPAGE, > do_sync_mmap_readahead() will allocate PMD-sized folios automatically. > On busy database servers (and is there any other kind?), khugepaged > takes too long to run and find opportunities to collapse text pages. > Like, days. Yes, in particular given that the default khugepaged settings are awful. > >> It is more like turning on READ_ONLY_THP_FOR_FS by default for >> FS with large folio support instead of removing it. >> >> OK, I will give it another try. > > I think the test needs to be: > > if (mapping_max_folio_order(mapping) >= PMD_ORDER) > > as there can be cases of filesystems which support up to, say, 64KiB, > but not all the way up to 2MiB. I disapprove of this situation, but > this is where we are right now. Right, that's what I had in mind. -- Cheers, David
On 23 Mar 2026, at 17:05, David Hildenbrand (Arm) wrote: > On 3/23/26 22:02, Matthew Wilcox wrote: >> On Mon, Mar 23, 2026 at 04:24:26PM -0400, Zi Yan wrote: >>> On 23 Mar 2026, at 16:13, David Hildenbrand (Arm) wrote: >>>> I assume so, because for executables you would have to be lucky to get a >>>> PMD THP? So I don't see the non-khugepaged large folio support on par >>>> with khugepaged support. >> >> Not necessarily that lucky; if you set VM_HUGEPAGE, >> do_sync_mmap_readahead() will allocate PMD-sized folios automatically. >> On busy database servers (and is there any other kind?), khugepaged >> takes too long to run and find opportunities to collapse text pages. >> Like, days. > > Yes, in particular given that the default khugepaged settings are awful. > >> >>> It is more like turning on READ_ONLY_THP_FOR_FS by default for >>> FS with large folio support instead of removing it. >>> >>> OK, I will give it another try. >> >> I think the test needs to be: >> >> if (mapping_max_folio_order(mapping) >= PMD_ORDER) This is very helpful, since I was thinking about using mapping_large_folio_support(). >> >> as there can be cases of filesystems which support up to, say, 64KiB, >> but not all the way up to 2MiB. I disapprove of this situation, but >> this is where we are right now. > > Right, that's what I had in mind. Does Nico’s mTHP support for khugepaged include changes to collapse_file()? That might change the above test. Best Regards, Yan, Zi
On 3/23/26 22:11, Zi Yan wrote: > On 23 Mar 2026, at 17:05, David Hildenbrand (Arm) wrote: > >> On 3/23/26 22:02, Matthew Wilcox wrote: >>> >>> Not necessarily that lucky; if you set VM_HUGEPAGE, >>> do_sync_mmap_readahead() will allocate PMD-sized folios automatically. >>> On busy database servers (and is there any other kind?), khugepaged >>> takes too long to run and find opportunities to collapse text pages. >>> Like, days. >> >> Yes, in particular given that the default khugepaged settings are awful. >> >>> >>> >>> I think the test needs to be: >>> >>> if (mapping_max_folio_order(mapping) >= PMD_ORDER) > > This is very helpful, since I was thinking about using > mapping_large_folio_support(). > >>> >>> as there can be cases of filesystems which support up to, say, 64KiB, >>> but not all the way up to 2MiB. I disapprove of this situation, but >>> this is where we are right now. >> >> Right, that's what I had in mind. > > Does Nico’s mTHP support for khugepaged include changes to collapse_file()? > That might change the above test. At least not regarding adding support for other folio sizes. Baolin (IIRC) had a version for shmem support, but that will come after Nico's series was merged. -- Cheers, David
On 23 Mar 2026, at 17:21, David Hildenbrand (Arm) wrote: > On 3/23/26 22:11, Zi Yan wrote: >> On 23 Mar 2026, at 17:05, David Hildenbrand (Arm) wrote: >> >>> On 3/23/26 22:02, Matthew Wilcox wrote: >>>> >>>> Not necessarily that lucky; if you set VM_HUGEPAGE, >>>> do_sync_mmap_readahead() will allocate PMD-sized folios automatically. >>>> On busy database servers (and is there any other kind?), khugepaged >>>> takes too long to run and find opportunities to collapse text pages. >>>> Like, days. >>> >>> Yes, in particular given that the default khugepaged settings are awful. >>> >>>> >>>> >>>> I think the test needs to be: >>>> >>>> if (mapping_max_folio_order(mapping) >= PMD_ORDER) >> >> This is very helpful, since I was thinking about using >> mapping_large_folio_support(). >> >>>> >>>> as there can be cases of filesystems which support up to, say, 64KiB, >>>> but not all the way up to 2MiB. I disapprove of this situation, but >>>> this is where we are right now. >>> >>> Right, that's what I had in mind. >> >> Does Nico’s mTHP support for khugepaged include changes to collapse_file()? >> That might change the above test. > > At least not regarding adding support for other folio sizes. Baolin > (IIRC) had a version for shmem support, but that will come after Nico's > series was merged. Great. That makes my life easier. Thanks. Best Regards, Yan, Zi
On 3/24/26 8:12 AM, Zi Yan wrote: > On 23 Mar 2026, at 17:21, David Hildenbrand (Arm) wrote: > >> On 3/23/26 22:11, Zi Yan wrote: >>> On 23 Mar 2026, at 17:05, David Hildenbrand (Arm) wrote: >>> >>>> On 3/23/26 22:02, Matthew Wilcox wrote: >>>>> >>>>> Not necessarily that lucky; if you set VM_HUGEPAGE, >>>>> do_sync_mmap_readahead() will allocate PMD-sized folios automatically. >>>>> On busy database servers (and is there any other kind?), khugepaged >>>>> takes too long to run and find opportunities to collapse text pages. >>>>> Like, days. >>>> >>>> Yes, in particular given that the default khugepaged settings are awful. >>>> >>>>> >>>>> >>>>> I think the test needs to be: >>>>> >>>>> if (mapping_max_folio_order(mapping) >= PMD_ORDER) >>> >>> This is very helpful, since I was thinking about using >>> mapping_large_folio_support(). >>> >>>>> >>>>> as there can be cases of filesystems which support up to, say, 64KiB, >>>>> but not all the way up to 2MiB. I disapprove of this situation, but >>>>> this is where we are right now. >>>> >>>> Right, that's what I had in mind. >>> >>> Does Nico’s mTHP support for khugepaged include changes to collapse_file()? >>> That might change the above test. >> >> At least not regarding adding support for other folio sizes. Baolin >> (IIRC) had a version for shmem support, but that will come after Nico's >> series was merged. Right. Here is the link to the RFC version: https://lore.kernel.org/all/cover.1755677674.git.baolin.wang@linux.alibaba.com/ Moreover, I have further updated my local tree based on Nico's series. I will send out the new version once his series is merged.
© 2016 - 2026 Red Hat, Inc.