Remove READ_ONLY_THP_FOR_FS

[RFC PATCH 02/11] mm/khugepaged: remove READ_ONLY_THP_FOR_FS code in collapse_file()

Posted by Zi Yan 1 week, 5 days ago

READ_ONLY_THP_FOR_FS is no longer present, remove corresponding code.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 mm/khugepaged.c | 159 +++++++++++-------------------------------------
 1 file changed, 34 insertions(+), 125 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b2ac28ddd480..39f0b8959535 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1899,7 +1899,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
 	int nr_none = 0;
 	bool is_shmem = shmem_file(file);
 
-	VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
+	VM_WARN_ON_ONCE(!is_shmem);
 	VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
 
 	result = alloc_charge_folio(&new_folio, mm, cc);
@@ -1909,8 +1909,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
 	mapping_set_update(&xas, mapping);
 
 	__folio_set_locked(new_folio);
-	if (is_shmem)
-		__folio_set_swapbacked(new_folio);
+	__folio_set_swapbacked(new_folio);
 	new_folio->index = start;
 	new_folio->mapping = mapping;
 
@@ -1935,83 +1934,39 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
 		folio = xas_load(&xas);
 
 		VM_BUG_ON(index != xas.xa_index);
-		if (is_shmem) {
-			if (!folio) {
-				/*
-				 * Stop if extent has been truncated or
-				 * hole-punched, and is now completely
-				 * empty.
-				 */
-				if (index == start) {
-					if (!xas_next_entry(&xas, end - 1)) {
-						result = SCAN_TRUNCATED;
-						goto xa_locked;
-					}
+		if (!folio) {
+			/*
+			 * Stop if extent has been truncated or
+			 * hole-punched, and is now completely
+			 * empty.
+			 */
+			if (index == start) {
+				if (!xas_next_entry(&xas, end - 1)) {
+					result = SCAN_TRUNCATED;
+					goto xa_locked;
 				}
-				nr_none++;
-				index++;
-				continue;
 			}
+			nr_none++;
+			index++;
+			continue;
+		}
 
-			if (xa_is_value(folio) || !folio_test_uptodate(folio)) {
-				xas_unlock_irq(&xas);
-				/* swap in or instantiate fallocated page */
-				if (shmem_get_folio(mapping->host, index, 0,
-						&folio, SGP_NOALLOC)) {
-					result = SCAN_FAIL;
-					goto xa_unlocked;
-				}
-				/* drain lru cache to help folio_isolate_lru() */
-				lru_add_drain();
-			} else if (folio_trylock(folio)) {
-				folio_get(folio);
-				xas_unlock_irq(&xas);
-			} else {
-				result = SCAN_PAGE_LOCK;
-				goto xa_locked;
-			}
-		} else {	/* !is_shmem */
-			if (!folio || xa_is_value(folio)) {
-				xas_unlock_irq(&xas);
-				page_cache_sync_readahead(mapping, &file->f_ra,
-							  file, index,
-							  end - index);
-				/* drain lru cache to help folio_isolate_lru() */
-				lru_add_drain();
-				folio = filemap_lock_folio(mapping, index);
-				if (IS_ERR(folio)) {
-					result = SCAN_FAIL;
-					goto xa_unlocked;
-				}
-			} else if (folio_test_dirty(folio)) {
-				/*
-				 * khugepaged only works on read-only fd,
-				 * so this page is dirty because it hasn't
-				 * been flushed since first write. There
-				 * won't be new dirty pages.
-				 *
-				 * Trigger async flush here and hope the
-				 * writeback is done when khugepaged
-				 * revisits this page.
-				 *
-				 * This is a one-off situation. We are not
-				 * forcing writeback in loop.
-				 */
-				xas_unlock_irq(&xas);
-				filemap_flush(mapping);
-				result = SCAN_PAGE_DIRTY_OR_WRITEBACK;
-				goto xa_unlocked;
-			} else if (folio_test_writeback(folio)) {
-				xas_unlock_irq(&xas);
-				result = SCAN_PAGE_DIRTY_OR_WRITEBACK;
+		if (xa_is_value(folio) || !folio_test_uptodate(folio)) {
+			xas_unlock_irq(&xas);
+			/* swap in or instantiate fallocated page */
+			if (shmem_get_folio(mapping->host, index, 0,
+					&folio, SGP_NOALLOC)) {
+				result = SCAN_FAIL;
 				goto xa_unlocked;
-			} else if (folio_trylock(folio)) {
-				folio_get(folio);
-				xas_unlock_irq(&xas);
-			} else {
-				result = SCAN_PAGE_LOCK;
-				goto xa_locked;
 			}
+			/* drain lru cache to help folio_isolate_lru() */
+			lru_add_drain();
+		} else if (folio_trylock(folio)) {
+			folio_get(folio);
+			xas_unlock_irq(&xas);
+		} else {
+			result = SCAN_PAGE_LOCK;
+			goto xa_locked;
 		}
 
 		/*
@@ -2041,17 +1996,6 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
 			goto out_unlock;
 		}
 
-		if (!is_shmem && (folio_test_dirty(folio) ||
-				  folio_test_writeback(folio))) {
-			/*
-			 * khugepaged only works on read-only fd, so this
-			 * folio is dirty because it hasn't been flushed
-			 * since first write.
-			 */
-			result = SCAN_PAGE_DIRTY_OR_WRITEBACK;
-			goto out_unlock;
-		}
-
 		if (!folio_isolate_lru(folio)) {
 			result = SCAN_DEL_PAGE_LRU;
 			goto out_unlock;
@@ -2101,21 +2045,6 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
 		goto xa_unlocked;
 	}
 
-	if (!is_shmem) {
-		filemap_nr_thps_inc(mapping);
-		/*
-		 * Paired with the fence in do_dentry_open() -> get_write_access()
-		 * to ensure i_writecount is up to date and the update to nr_thps
-		 * is visible. Ensures the page cache will be truncated if the
-		 * file is opened writable.
-		 */
-		smp_mb();
-		if (inode_is_open_for_write(mapping->host)) {
-			result = SCAN_FAIL;
-			filemap_nr_thps_dec(mapping);
-		}
-	}
-
 xa_locked:
 	xas_unlock_irq(&xas);
 xa_unlocked:
@@ -2224,12 +2153,8 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
 		xas_lock_irq(&xas);
 	}
 
-	if (is_shmem) {
-		lruvec_stat_mod_folio(new_folio, NR_SHMEM, HPAGE_PMD_NR);
-		lruvec_stat_mod_folio(new_folio, NR_SHMEM_THPS, HPAGE_PMD_NR);
-	} else {
-		lruvec_stat_mod_folio(new_folio, NR_FILE_THPS, HPAGE_PMD_NR);
-	}
+	lruvec_stat_mod_folio(new_folio, NR_SHMEM, HPAGE_PMD_NR);
+	lruvec_stat_mod_folio(new_folio, NR_SHMEM_THPS, HPAGE_PMD_NR);
 	lruvec_stat_mod_folio(new_folio, NR_FILE_PAGES, HPAGE_PMD_NR);
 
 	/*
@@ -2240,8 +2165,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
 	folio_mark_uptodate(new_folio);
 	folio_ref_add(new_folio, HPAGE_PMD_NR - 1);
 
-	if (is_shmem)
-		folio_mark_dirty(new_folio);
+	folio_mark_dirty(new_folio);
 	folio_add_lru(new_folio);
 
 	/* Join all the small entries into a single multi-index entry. */
@@ -2266,9 +2190,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
 		list_del(&folio->lru);
 		lruvec_stat_mod_folio(folio, NR_FILE_PAGES,
 				      -folio_nr_pages(folio));
-		if (is_shmem)
-			lruvec_stat_mod_folio(folio, NR_SHMEM,
-					      -folio_nr_pages(folio));
+		lruvec_stat_mod_folio(folio, NR_SHMEM, -folio_nr_pages(folio));
 		folio->mapping = NULL;
 		folio_clear_active(folio);
 		folio_clear_unevictable(folio);
@@ -2293,19 +2215,6 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
 		folio_putback_lru(folio);
 		folio_put(folio);
 	}
-	/*
-	 * Undo the updates of filemap_nr_thps_inc for non-SHMEM
-	 * file only. This undo is not needed unless failure is
-	 * due to SCAN_COPY_MC.
-	 */
-	if (!is_shmem && result == SCAN_COPY_MC) {
-		filemap_nr_thps_dec(mapping);
-		/*
-		 * Paired with the fence in do_dentry_open() -> get_write_access()
-		 * to ensure the update to nr_thps is visible.
-		 */
-		smp_mb();
-	}
 
 	new_folio->mapping = NULL;
 
-- 
2.43.0

Re: [RFC PATCH 02/11] mm/khugepaged: remove READ_ONLY_THP_FOR_FS code in collapse_file()

Posted by David Hildenbrand (Arm) 1 week, 5 days ago

On 3/23/26 20:06, Zi Yan wrote:
> READ_ONLY_THP_FOR_FS is no longer present, remove corresponding code.
> 
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
>  mm/khugepaged.c | 159 +++++++++++-------------------------------------
>  1 file changed, 34 insertions(+), 125 deletions(-)
> 
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index b2ac28ddd480..39f0b8959535 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1899,7 +1899,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
>  	int nr_none = 0;
>  	bool is_shmem = shmem_file(file);
>  
> -	VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
> +	VM_WARN_ON_ONCE(!is_shmem);

Oh, but if that means that khugepaged cannot collapse large folios in
FSes anymore, then this is the wrong approach I suppose?

I would have assumed that we would now collapse for any files that
support large folios (in PMD size), not stopping to collapse entirely.

-- 
Cheers,

David

Re: [RFC PATCH 02/11] mm/khugepaged: remove READ_ONLY_THP_FOR_FS code in collapse_file()

Posted by Zi Yan 1 week, 5 days ago

On 23 Mar 2026, at 15:41, David Hildenbrand (Arm) wrote:

> On 3/23/26 20:06, Zi Yan wrote:
>> READ_ONLY_THP_FOR_FS is no longer present, remove corresponding code.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>>  mm/khugepaged.c | 159 +++++++++++-------------------------------------
>>  1 file changed, 34 insertions(+), 125 deletions(-)
>>
>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>> index b2ac28ddd480..39f0b8959535 100644
>> --- a/mm/khugepaged.c
>> +++ b/mm/khugepaged.c
>> @@ -1899,7 +1899,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
>>  	int nr_none = 0;
>>  	bool is_shmem = shmem_file(file);
>>
>> -	VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
>> +	VM_WARN_ON_ONCE(!is_shmem);
>
> Oh, but if that means that khugepaged cannot collapse large folios in
> FSes anymore, then this is the wrong approach I suppose?
>
> I would have assumed that we would now collapse for any files that
> support large folios (in PMD size), not stopping to collapse entirely.

My understanding is that collapse_file() is only used for
READ_ONLY_THP_FOR_FS. If FSes with large folio support also use it,
I can replace IS_ENABLE with mapping_large_folio_support().

Best Regards,
Yan, Zi

Re: [RFC PATCH 02/11] mm/khugepaged: remove READ_ONLY_THP_FOR_FS code in collapse_file()

Posted by David Hildenbrand (Arm) 1 week, 5 days ago

On 3/23/26 20:47, Zi Yan wrote:
> On 23 Mar 2026, at 15:41, David Hildenbrand (Arm) wrote:
> 
>> On 3/23/26 20:06, Zi Yan wrote:
>>> READ_ONLY_THP_FOR_FS is no longer present, remove corresponding code.
>>>
>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>> ---
>>>  mm/khugepaged.c | 159 +++++++++++-------------------------------------
>>>  1 file changed, 34 insertions(+), 125 deletions(-)
>>>
>>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>>> index b2ac28ddd480..39f0b8959535 100644
>>> --- a/mm/khugepaged.c
>>> +++ b/mm/khugepaged.c
>>> @@ -1899,7 +1899,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
>>>  	int nr_none = 0;
>>>  	bool is_shmem = shmem_file(file);
>>>
>>> -	VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
>>> +	VM_WARN_ON_ONCE(!is_shmem);
>>
>> Oh, but if that means that khugepaged cannot collapse large folios in
>> FSes anymore, then this is the wrong approach I suppose?
>>
>> I would have assumed that we would now collapse for any files that
>> support large folios (in PMD size), not stopping to collapse entirely.
> 
> My understanding is that collapse_file() is only used for
> READ_ONLY_THP_FOR_FS. If FSes with large folio support also use it,
> I can replace IS_ENABLE with mapping_large_folio_support().

Otherwise we'd be losing support for THP collapse in files? We'd have to
cross fingers that readahead gives us some.

So we have to be a bit careful here. We want khugepaged to collapse THPs
in filesystems that support large folios even without READ_ONLY_THP_FOR_FS.

-- 
Cheers,

David

Re: [RFC PATCH 02/11] mm/khugepaged: remove READ_ONLY_THP_FOR_FS code in collapse_file()

Posted by Zi Yan 1 week, 5 days ago

On 23 Mar 2026, at 15:53, David Hildenbrand (Arm) wrote:

> On 3/23/26 20:47, Zi Yan wrote:
>> On 23 Mar 2026, at 15:41, David Hildenbrand (Arm) wrote:
>>
>>> On 3/23/26 20:06, Zi Yan wrote:
>>>> READ_ONLY_THP_FOR_FS is no longer present, remove corresponding code.
>>>>
>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>> ---
>>>>  mm/khugepaged.c | 159 +++++++++++-------------------------------------
>>>>  1 file changed, 34 insertions(+), 125 deletions(-)
>>>>
>>>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>>>> index b2ac28ddd480..39f0b8959535 100644
>>>> --- a/mm/khugepaged.c
>>>> +++ b/mm/khugepaged.c
>>>> @@ -1899,7 +1899,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
>>>>  	int nr_none = 0;
>>>>  	bool is_shmem = shmem_file(file);
>>>>
>>>> -	VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
>>>> +	VM_WARN_ON_ONCE(!is_shmem);
>>>
>>> Oh, but if that means that khugepaged cannot collapse large folios in
>>> FSes anymore, then this is the wrong approach I suppose?
>>>
>>> I would have assumed that we would now collapse for any files that
>>> support large folios (in PMD size), not stopping to collapse entirely.
>>
>> My understanding is that collapse_file() is only used for
>> READ_ONLY_THP_FOR_FS. If FSes with large folio support also use it,
>> I can replace IS_ENABLE with mapping_large_folio_support().
>
> Otherwise we'd be losing support for THP collapse in files? We'd have to
> cross fingers that readahead gives us some.
>
> So we have to be a bit careful here. We want khugepaged to collapse THPs
> in filesystems that support large folios even without READ_ONLY_THP_FOR_FS.

Based on[1], collapse_file() only works on read-only fd. So it is not
as general as all files. If we do what you suggested, the function
will be collapse read-only folios from FSes with large folio support.

BTW, has anyone used/relied on collapse_file() on any FS with large folio
support?

[1] https://elixir.bootlin.com/linux/v6.19.9/source/mm/khugepaged.c#L1956


Best Regards,
Yan, Zi

Re: [RFC PATCH 02/11] mm/khugepaged: remove READ_ONLY_THP_FOR_FS code in collapse_file()

Posted by David Hildenbrand (Arm) 1 week, 5 days ago

On 3/23/26 20:59, Zi Yan wrote:
> On 23 Mar 2026, at 15:53, David Hildenbrand (Arm) wrote:
> 
>> On 3/23/26 20:47, Zi Yan wrote:
>>>
>>>
>>> My understanding is that collapse_file() is only used for
>>> READ_ONLY_THP_FOR_FS. If FSes with large folio support also use it,
>>> I can replace IS_ENABLE with mapping_large_folio_support().
>>
>> Otherwise we'd be losing support for THP collapse in files? We'd have to
>> cross fingers that readahead gives us some.
>>
>> So we have to be a bit careful here. We want khugepaged to collapse THPs
>> in filesystems that support large folios even without READ_ONLY_THP_FOR_FS.
> 
> Based on[1], collapse_file() only works on read-only fd. So it is not
> as general as all files. If we do what you suggested, the function
> will be collapse read-only folios from FSes with large folio support.

Right, I think that's the first step to keep the existing use case
working. Long term, I suspect we might want to extend that to writable
files etc.

> 
> BTW, has anyone used/relied on collapse_file() on any FS with large folio
> support?

I assume so, because for executables you would have to be lucky to get a
PMD THP? So I don't see the non-khugepaged large folio support on par
with khugepaged support.

-- 
Cheers,

David

Re: [RFC PATCH 02/11] mm/khugepaged: remove READ_ONLY_THP_FOR_FS code in collapse_file()

Posted by Zi Yan 1 week, 5 days ago

On 23 Mar 2026, at 16:13, David Hildenbrand (Arm) wrote:

> On 3/23/26 20:59, Zi Yan wrote:
>> On 23 Mar 2026, at 15:53, David Hildenbrand (Arm) wrote:
>>
>>> On 3/23/26 20:47, Zi Yan wrote:
>>>>
>>>>
>>>> My understanding is that collapse_file() is only used for
>>>> READ_ONLY_THP_FOR_FS. If FSes with large folio support also use it,
>>>> I can replace IS_ENABLE with mapping_large_folio_support().
>>>
>>> Otherwise we'd be losing support for THP collapse in files? We'd have to
>>> cross fingers that readahead gives us some.
>>>
>>> So we have to be a bit careful here. We want khugepaged to collapse THPs
>>> in filesystems that support large folios even without READ_ONLY_THP_FOR_FS.
>>
>> Based on[1], collapse_file() only works on read-only fd. So it is not
>> as general as all files. If we do what you suggested, the function
>> will be collapse read-only folios from FSes with large folio support.
>
> Right, I think that's the first step to keep the existing use case
> working. Long term, I suspect we might want to extend that to writable
> files etc.
>
>>
>> BTW, has anyone used/relied on collapse_file() on any FS with large folio
>> support?
>
> I assume so, because for executables you would have to be lucky to get a
> PMD THP? So I don't see the non-khugepaged large folio support on par
> with khugepaged support.

It is more like turning on READ_ONLY_THP_FOR_FS by default for
FS with large folio support instead of removing it.

OK, I will give it another try.


Best Regards,
Yan, Zi

Re: [RFC PATCH 02/11] mm/khugepaged: remove READ_ONLY_THP_FOR_FS code in collapse_file()

Posted by Matthew Wilcox 1 week, 5 days ago

On Mon, Mar 23, 2026 at 04:24:26PM -0400, Zi Yan wrote:
> On 23 Mar 2026, at 16:13, David Hildenbrand (Arm) wrote:
> > I assume so, because for executables you would have to be lucky to get a
> > PMD THP? So I don't see the non-khugepaged large folio support on par
> > with khugepaged support.

Not necessarily that lucky; if you set VM_HUGEPAGE,
do_sync_mmap_readahead() will allocate PMD-sized folios automatically.
On busy database servers (and is there any other kind?), khugepaged
takes too long to run and find opportunities to collapse text pages.
Like, days.

> It is more like turning on READ_ONLY_THP_FOR_FS by default for
> FS with large folio support instead of removing it.
> 
> OK, I will give it another try.

I think the test needs to be:

	if (mapping_max_folio_order(mapping) >= PMD_ORDER)

as there can be cases of filesystems which support up to, say, 64KiB,
but not all the way up to 2MiB.  I disapprove of this situation, but
this is where we are right now.

Re: [RFC PATCH 02/11] mm/khugepaged: remove READ_ONLY_THP_FOR_FS code in collapse_file()

Posted by David Hildenbrand (Arm) 1 week, 5 days ago

On 3/23/26 22:02, Matthew Wilcox wrote:
> On Mon, Mar 23, 2026 at 04:24:26PM -0400, Zi Yan wrote:
>> On 23 Mar 2026, at 16:13, David Hildenbrand (Arm) wrote:
>>> I assume so, because for executables you would have to be lucky to get a
>>> PMD THP? So I don't see the non-khugepaged large folio support on par
>>> with khugepaged support.
> 
> Not necessarily that lucky; if you set VM_HUGEPAGE,
> do_sync_mmap_readahead() will allocate PMD-sized folios automatically.
> On busy database servers (and is there any other kind?), khugepaged
> takes too long to run and find opportunities to collapse text pages.
> Like, days.

Yes, in particular given that the default khugepaged settings are awful.

> 
>> It is more like turning on READ_ONLY_THP_FOR_FS by default for
>> FS with large folio support instead of removing it.
>>
>> OK, I will give it another try.
> 
> I think the test needs to be:
> 
> 	if (mapping_max_folio_order(mapping) >= PMD_ORDER)
> 
> as there can be cases of filesystems which support up to, say, 64KiB,
> but not all the way up to 2MiB.  I disapprove of this situation, but
> this is where we are right now.

Right, that's what I had in mind.

-- 
Cheers,

David

Re: [RFC PATCH 02/11] mm/khugepaged: remove READ_ONLY_THP_FOR_FS code in collapse_file()

Posted by Zi Yan 1 week, 5 days ago

On 23 Mar 2026, at 17:05, David Hildenbrand (Arm) wrote:

> On 3/23/26 22:02, Matthew Wilcox wrote:
>> On Mon, Mar 23, 2026 at 04:24:26PM -0400, Zi Yan wrote:
>>> On 23 Mar 2026, at 16:13, David Hildenbrand (Arm) wrote:
>>>> I assume so, because for executables you would have to be lucky to get a
>>>> PMD THP? So I don't see the non-khugepaged large folio support on par
>>>> with khugepaged support.
>>
>> Not necessarily that lucky; if you set VM_HUGEPAGE,
>> do_sync_mmap_readahead() will allocate PMD-sized folios automatically.
>> On busy database servers (and is there any other kind?), khugepaged
>> takes too long to run and find opportunities to collapse text pages.
>> Like, days.
>
> Yes, in particular given that the default khugepaged settings are awful.
>
>>
>>> It is more like turning on READ_ONLY_THP_FOR_FS by default for
>>> FS with large folio support instead of removing it.
>>>
>>> OK, I will give it another try.
>>
>> I think the test needs to be:
>>
>> 	if (mapping_max_folio_order(mapping) >= PMD_ORDER)

This is very helpful, since I was thinking about using
mapping_large_folio_support().

>>
>> as there can be cases of filesystems which support up to, say, 64KiB,
>> but not all the way up to 2MiB.  I disapprove of this situation, but
>> this is where we are right now.
>
> Right, that's what I had in mind.

Does Nico’s mTHP support for khugepaged include changes to collapse_file()?
That might change the above test.

Best Regards,
Yan, Zi

Re: [RFC PATCH 02/11] mm/khugepaged: remove READ_ONLY_THP_FOR_FS code in collapse_file()

Posted by David Hildenbrand (Arm) 1 week, 5 days ago

On 3/23/26 22:11, Zi Yan wrote:
> On 23 Mar 2026, at 17:05, David Hildenbrand (Arm) wrote:
> 
>> On 3/23/26 22:02, Matthew Wilcox wrote:
>>>
>>> Not necessarily that lucky; if you set VM_HUGEPAGE,
>>> do_sync_mmap_readahead() will allocate PMD-sized folios automatically.
>>> On busy database servers (and is there any other kind?), khugepaged
>>> takes too long to run and find opportunities to collapse text pages.
>>> Like, days.
>>
>> Yes, in particular given that the default khugepaged settings are awful.
>>
>>>
>>>
>>> I think the test needs to be:
>>>
>>> 	if (mapping_max_folio_order(mapping) >= PMD_ORDER)
> 
> This is very helpful, since I was thinking about using
> mapping_large_folio_support().
> 
>>>
>>> as there can be cases of filesystems which support up to, say, 64KiB,
>>> but not all the way up to 2MiB.  I disapprove of this situation, but
>>> this is where we are right now.
>>
>> Right, that's what I had in mind.
> 
> Does Nico’s mTHP support for khugepaged include changes to collapse_file()?
> That might change the above test.

At least not regarding adding support for other folio sizes. Baolin
(IIRC) had a version for shmem support, but that will come after Nico's
series was merged.

-- 
Cheers,

David

Re: [RFC PATCH 02/11] mm/khugepaged: remove READ_ONLY_THP_FOR_FS code in collapse_file()

Posted by Zi Yan 1 week, 5 days ago

On 23 Mar 2026, at 17:21, David Hildenbrand (Arm) wrote:

> On 3/23/26 22:11, Zi Yan wrote:
>> On 23 Mar 2026, at 17:05, David Hildenbrand (Arm) wrote:
>>
>>> On 3/23/26 22:02, Matthew Wilcox wrote:
>>>>
>>>> Not necessarily that lucky; if you set VM_HUGEPAGE,
>>>> do_sync_mmap_readahead() will allocate PMD-sized folios automatically.
>>>> On busy database servers (and is there any other kind?), khugepaged
>>>> takes too long to run and find opportunities to collapse text pages.
>>>> Like, days.
>>>
>>> Yes, in particular given that the default khugepaged settings are awful.
>>>
>>>>
>>>>
>>>> I think the test needs to be:
>>>>
>>>> 	if (mapping_max_folio_order(mapping) >= PMD_ORDER)
>>
>> This is very helpful, since I was thinking about using
>> mapping_large_folio_support().
>>
>>>>
>>>> as there can be cases of filesystems which support up to, say, 64KiB,
>>>> but not all the way up to 2MiB.  I disapprove of this situation, but
>>>> this is where we are right now.
>>>
>>> Right, that's what I had in mind.
>>
>> Does Nico’s mTHP support for khugepaged include changes to collapse_file()?
>> That might change the above test.
>
> At least not regarding adding support for other folio sizes. Baolin
> (IIRC) had a version for shmem support, but that will come after Nico's
> series was merged.

Great. That makes my life easier. Thanks.

Best Regards,
Yan, Zi

Re: [RFC PATCH 02/11] mm/khugepaged: remove READ_ONLY_THP_FOR_FS code in collapse_file()

Posted by Baolin Wang 1 week, 5 days ago


On 3/24/26 8:12 AM, Zi Yan wrote:
> On 23 Mar 2026, at 17:21, David Hildenbrand (Arm) wrote:
> 
>> On 3/23/26 22:11, Zi Yan wrote:
>>> On 23 Mar 2026, at 17:05, David Hildenbrand (Arm) wrote:
>>>
>>>> On 3/23/26 22:02, Matthew Wilcox wrote:
>>>>>
>>>>> Not necessarily that lucky; if you set VM_HUGEPAGE,
>>>>> do_sync_mmap_readahead() will allocate PMD-sized folios automatically.
>>>>> On busy database servers (and is there any other kind?), khugepaged
>>>>> takes too long to run and find opportunities to collapse text pages.
>>>>> Like, days.
>>>>
>>>> Yes, in particular given that the default khugepaged settings are awful.
>>>>
>>>>>
>>>>>
>>>>> I think the test needs to be:
>>>>>
>>>>> 	if (mapping_max_folio_order(mapping) >= PMD_ORDER)
>>>
>>> This is very helpful, since I was thinking about using
>>> mapping_large_folio_support().
>>>
>>>>>
>>>>> as there can be cases of filesystems which support up to, say, 64KiB,
>>>>> but not all the way up to 2MiB.  I disapprove of this situation, but
>>>>> this is where we are right now.
>>>>
>>>> Right, that's what I had in mind.
>>>
>>> Does Nico’s mTHP support for khugepaged include changes to collapse_file()?
>>> That might change the above test.
>>
>> At least not regarding adding support for other folio sizes. Baolin
>> (IIRC) had a version for shmem support, but that will come after Nico's
>> series was merged.

Right. Here is the link to the RFC version:

https://lore.kernel.org/all/cover.1755677674.git.baolin.wang@linux.alibaba.com/

Moreover, I have further updated my local tree based on Nico's series. I 
will send out the new version once his series is merged.