folio_split() splits a large folio in the same way as buddy allocator
splits a large free page for allocation. The purpose is to minimize the
number of folios after the split. For example, if user wants to free the
3rd subpage in a order-9 folio, folio_split() will split the order-9 folio
as:
O-0, O-0, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-8 if it is anon,
since anon folio does not support order-1 yet.
-----------------------------------------------------------------
| | | | | | | | |
|O-0|O-0|O-0|O-0| O-2 |...| O-7 | O-8 |
| | | | | | | | |
-----------------------------------------------------------------
O-1, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-9 if it is pagecache
---------------------------------------------------------------
| | | | | | | |
| O-1 |O-0|O-0| O-2 |...| O-7 | O-8 |
| | | | | | | |
---------------------------------------------------------------
It generates fewer folios (i.e., 11 or 10) than existing page split
approach, which splits the order-9 to 512 order-0 folios. It also reduces
the number of new xa_node needed during a pagecache folio split from
8 to 1, potentially decreasing the folio split failure rate due to memory
constraints.
folio_split() and existing split_huge_page_to_list_to_order() share
the folio unmapping and remapping code in __folio_split() and the common
backend split code in __split_unmapped_folio() using
uniform_split variable to distinguish their operations.
uniform_split_supported() and non_uniform_split_supported() are added
to factor out check code and will be used outside __folio_split() in the
following commit.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
mm/huge_memory.c | 137 ++++++++++++++++++++++++++++++++++-------------
1 file changed, 100 insertions(+), 37 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 21ebe2dec5a4..400dfe8a6e60 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3853,12 +3853,68 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
return ret;
}
+static bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
+ bool warns)
+{
+ /* order-1 is not supported for anonymous THP. */
+ if (folio_test_anon(folio) && new_order == 1) {
+ VM_WARN_ONCE(warns, "Cannot split to order-1 folio");
+ return false;
+ }
+
+ /*
+ * No split if the file system does not support large folio.
+ * Note that we might still have THPs in such mappings due to
+ * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
+ * does not actually support large folios properly.
+ */
+ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+ !mapping_large_folio_support(folio->mapping)) {
+ VM_WARN_ONCE(warns,
+ "Cannot split file folio to non-0 order");
+ return false;
+ }
+
+ /* Only swapping a whole PMD-mapped folio is supported */
+ if (folio_test_swapcache(folio)) {
+ VM_WARN_ONCE(warns,
+ "Cannot split swapcache folio to non-0 order");
+ return false;
+ }
+
+ return true;
+}
+
+/* See comments in non_uniform_split_supported() */
+static bool uniform_split_supported(struct folio *folio, unsigned int new_order,
+ bool warns)
+{
+ if (folio_test_anon(folio) && new_order == 1) {
+ VM_WARN_ONCE(warns, "Cannot split to order-1 folio");
+ return false;
+ }
+
+ if (new_order) {
+ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+ !mapping_large_folio_support(folio->mapping)) {
+ VM_WARN_ONCE(warns,
+ "Cannot split file folio to non-0 order");
+ return false;
+ }
+ if (folio_test_swapcache(folio)) {
+ VM_WARN_ONCE(warns,
+ "Cannot split swapcache folio to non-0 order");
+ return false;
+ }
+ }
+ return true;
+}
+
static int __folio_split(struct folio *folio, unsigned int new_order,
- struct page *page, struct list_head *list)
+ struct page *page, struct list_head *list, bool uniform_split)
{
struct deferred_split *ds_queue = get_deferred_split_queue(folio);
- /* reset xarray order to new order after split */
- XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order);
+ XA_STATE(xas, &folio->mapping->i_pages, folio->index);
bool is_anon = folio_test_anon(folio);
struct address_space *mapping = NULL;
struct anon_vma *anon_vma = NULL;
@@ -3873,29 +3929,11 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
if (new_order >= folio_order(folio))
return -EINVAL;
- if (is_anon) {
- /* order-1 is not supported for anonymous THP. */
- if (new_order == 1) {
- VM_WARN_ONCE(1, "Cannot split to order-1 folio");
- return -EINVAL;
- }
- } else if (new_order) {
- /*
- * No split if the file system does not support large folio.
- * Note that we might still have THPs in such mappings due to
- * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
- * does not actually support large folios properly.
- */
- if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
- !mapping_large_folio_support(folio->mapping)) {
- VM_WARN_ONCE(1,
- "Cannot split file folio to non-0 order");
- return -EINVAL;
- }
- }
+ if (uniform_split && !uniform_split_supported(folio, new_order, true))
+ return -EINVAL;
- /* Only swapping a whole PMD-mapped folio is supported */
- if (folio_test_swapcache(folio) && new_order)
+ if (!uniform_split &&
+ !non_uniform_split_supported(folio, new_order, true))
return -EINVAL;
is_hzp = is_huge_zero_folio(folio);
@@ -3952,10 +3990,13 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
goto out;
}
- xas_split_alloc(&xas, folio, folio_order(folio), gfp);
- if (xas_error(&xas)) {
- ret = xas_error(&xas);
- goto out;
+ if (uniform_split) {
+ xas_set_order(&xas, folio->index, new_order);
+ xas_split_alloc(&xas, folio, folio_order(folio), gfp);
+ if (xas_error(&xas)) {
+ ret = xas_error(&xas);
+ goto out;
+ }
}
anon_vma = NULL;
@@ -4020,7 +4061,6 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
if (mapping) {
int nr = folio_nr_pages(folio);
- xas_split(&xas, folio, folio_order(folio));
if (folio_test_pmd_mappable(folio) &&
new_order < HPAGE_PMD_ORDER) {
if (folio_test_swapbacked(folio)) {
@@ -4034,12 +4074,8 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
}
}
- if (is_anon) {
- mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1);
- mod_mthp_stat(new_order, MTHP_STAT_NR_ANON, 1 << (order - new_order));
- }
- __split_huge_page(page, list, end, new_order);
- ret = 0;
+ ret = __split_unmapped_folio(page_folio(page), new_order,
+ page, list, end, &xas, mapping, uniform_split);
} else {
spin_unlock(&ds_queue->split_queue_lock);
fail:
@@ -4117,7 +4153,34 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
{
struct folio *folio = page_folio(page);
- return __folio_split(folio, new_order, page, list);
+ return __folio_split(folio, new_order, page, list, true);
+}
+
+/*
+ * folio_split: split a folio at @page to a @new_order folio
+ * @folio: folio to split
+ * @new_order: the order of the new folio
+ * @page: a page within the new folio
+ *
+ * return: 0: successful, <0 failed (if -ENOMEM is returned, @folio might be
+ * split but not to @new_order, the caller needs to check)
+ *
+ * It has the same prerequisites and returns as
+ * split_huge_page_to_list_to_order().
+ *
+ * Split a folio at offset_in_new_order to a new_order folio, leave the
+ * remaining subpages of the original folio as large as possible. For example,
+ * split an order-9 folio at its third order-3 subpages to an order-3 folio.
+ * There are 2^6=64 order-3 subpages in an order-9 folio and the result will be
+ * a set of folios with different order and the new folio is in bracket:
+ * [order-4, {order-3}, order-3, order-5, order-6, order-7, order-8].
+ *
+ * After split, folio is left locked for caller.
+ */
+int folio_split(struct folio *folio, unsigned int new_order,
+ struct page *page, struct list_head *list)
+{
+ return __folio_split(folio, new_order, page, list, false);
}
int min_order_for_split(struct folio *folio)
--
2.47.2
On 11.02.25 16:50, Zi Yan wrote:
> folio_split() splits a large folio in the same way as buddy allocator
> splits a large free page for allocation. The purpose is to minimize the
> number of folios after the split. For example, if user wants to free the
> 3rd subpage in a order-9 folio, folio_split() will split the order-9 folio
> as:
> O-0, O-0, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-8 if it is anon,
> since anon folio does not support order-1 yet.
> -----------------------------------------------------------------
> | | | | | | | | |
> |O-0|O-0|O-0|O-0| O-2 |...| O-7 | O-8 |
> | | | | | | | | |
> -----------------------------------------------------------------
>
> O-1, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-9 if it is pagecache
> ---------------------------------------------------------------
> | | | | | | | |
> | O-1 |O-0|O-0| O-2 |...| O-7 | O-8 |
> | | | | | | | |
> ---------------------------------------------------------------
>
> It generates fewer folios (i.e., 11 or 10) than existing page split
> approach, which splits the order-9 to 512 order-0 folios. It also reduces
> the number of new xa_node needed during a pagecache folio split from
> 8 to 1, potentially decreasing the folio split failure rate due to memory
> constraints.
>
> folio_split() and existing split_huge_page_to_list_to_order() share
> the folio unmapping and remapping code in __folio_split() and the common
> backend split code in __split_unmapped_folio() using
> uniform_split variable to distinguish their operations.
>
> uniform_split_supported() and non_uniform_split_supported() are added
> to factor out check code and will be used outside __folio_split() in the
> following commit.
>
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
> mm/huge_memory.c | 137 ++++++++++++++++++++++++++++++++++-------------
> 1 file changed, 100 insertions(+), 37 deletions(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 21ebe2dec5a4..400dfe8a6e60 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -3853,12 +3853,68 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
> return ret;
> }
>
> +static bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
> + bool warns)
> +{
> + /* order-1 is not supported for anonymous THP. */
> + if (folio_test_anon(folio) && new_order == 1) {
> + VM_WARN_ONCE(warns, "Cannot split to order-1 folio");
> + return false;
> + }
> +
> + /*
> + * No split if the file system does not support large folio.
> + * Note that we might still have THPs in such mappings due to
> + * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
> + * does not actually support large folios properly.
> + */
> + if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
> + !mapping_large_folio_support(folio->mapping)) {
In this (and a similar case below), you need
if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
!folio_test_anon(folio) &&
!mapping_large_folio_support(folio->mapping)) {
Otherwise mapping_large_folio_support() is unhappy:
[root@localhost mm]# ./split_huge_page_test
TAP version 13
1..20
ok 1 Split zero filled huge pages successful
ok 2 Split huge pages to order 0 successful
[ 144.936764][T15389] ------------[ cut here ]------------
[ 144.937948][T15389] Anonymous mapping always supports large folio
[ 144.938164][T15389] WARNING: CPU: 5 PID: 15389 at ./include/linux/pagemap.h:494 uniform_split_supported+0x270/0x290
[ 144.941442][T15389] Modules linked in:
[ 144.942212][T15389] CPU: 5 UID: 0 PID: 15389 Comm: split_huge_page Not tainted 6.14.0-rc2-00200-gdcbc194183fd #153
[ 144.944188][T15389] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014
[ 144.945934][T15389] RIP: 0010:uniform_split_supported+0x270/0x290
[ 144.947144][T15389] Code: ff 89 de e8 c2 20 ca ff 84 db 0f 85 47 fe ff ff e8 05 26 ca ff c6 05 f6 c2 22 06 01 90 48 c7 c7 18 44 fa 86 e8 31 2a ac ff 90 <0f> 0b 90 90 e9 24 fe ff ff e8 e2 25 ca ff 48 c7 c6 08 52 f7 86 48
[ 144.950897][T15389] RSP: 0018:ffffc90022813990 EFLAGS: 00010286
[ 144.952058][T15389] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff8120ed77
[ 144.953559][T15389] RDX: ffff8881326f3880 RSI: ffffffff8120ed8a RDI: ffff8881326f3880
[ 144.955045][T15389] RBP: ffffea00062a0000 R08: 0000000000000001 R09: 0000000000000000
[ 144.956544][T15389] R10: 0000000000000000 R11: 0000000000000003 R12: 0000000000000001
[ 144.958043][T15389] R13: 0000000000000001 R14: ffff8881328b3951 R15: 0000000000000001
[ 144.959898][T15389] FS: 00007fb74cda4740(0000) GS:ffff888277b40000(0000) knlGS:0000000000000000
[ 144.961627][T15389] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 144.962886][T15389] CR2: 00007fb74ca00000 CR3: 000000013e072000 CR4: 0000000000750ef0
[ 144.964418][T15389] PKRU: 55555554
[ 144.965100][T15389] Call Trace:
[ 144.965746][T15389] <TASK>
[ 144.966331][T15389] ? uniform_split_supported+0x270/0x290
--
Cheers,
David / dhildenb
On 16 Feb 2025, at 5:32, David Hildenbrand wrote:
> On 11.02.25 16:50, Zi Yan wrote:
>> folio_split() splits a large folio in the same way as buddy allocator
>> splits a large free page for allocation. The purpose is to minimize the
>> number of folios after the split. For example, if user wants to free the
>> 3rd subpage in a order-9 folio, folio_split() will split the order-9 folio
>> as:
>> O-0, O-0, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-8 if it is anon,
>> since anon folio does not support order-1 yet.
>> -----------------------------------------------------------------
>> | | | | | | | | |
>> |O-0|O-0|O-0|O-0| O-2 |...| O-7 | O-8 |
>> | | | | | | | | |
>> -----------------------------------------------------------------
>>
>> O-1, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-9 if it is pagecache
>> ---------------------------------------------------------------
>> | | | | | | | |
>> | O-1 |O-0|O-0| O-2 |...| O-7 | O-8 |
>> | | | | | | | |
>> ---------------------------------------------------------------
>>
>> It generates fewer folios (i.e., 11 or 10) than existing page split
>> approach, which splits the order-9 to 512 order-0 folios. It also reduces
>> the number of new xa_node needed during a pagecache folio split from
>> 8 to 1, potentially decreasing the folio split failure rate due to memory
>> constraints.
>>
>> folio_split() and existing split_huge_page_to_list_to_order() share
>> the folio unmapping and remapping code in __folio_split() and the common
>> backend split code in __split_unmapped_folio() using
>> uniform_split variable to distinguish their operations.
>>
>> uniform_split_supported() and non_uniform_split_supported() are added
>> to factor out check code and will be used outside __folio_split() in the
>> following commit.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>> mm/huge_memory.c | 137 ++++++++++++++++++++++++++++++++++-------------
>> 1 file changed, 100 insertions(+), 37 deletions(-)
>>
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 21ebe2dec5a4..400dfe8a6e60 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -3853,12 +3853,68 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
>> return ret;
>> }
>> +static bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
>> + bool warns)
>> +{
>> + /* order-1 is not supported for anonymous THP. */
>> + if (folio_test_anon(folio) && new_order == 1) {
>> + VM_WARN_ONCE(warns, "Cannot split to order-1 folio");
>> + return false;
>> + }
>> +
>> + /*
>> + * No split if the file system does not support large folio.
>> + * Note that we might still have THPs in such mappings due to
>> + * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
>> + * does not actually support large folios properly.
>> + */
>> + if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
>> + !mapping_large_folio_support(folio->mapping)) {
>
> In this (and a similar case below), you need
>
> if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
> !folio_test_anon(folio) &&
> !mapping_large_folio_support(folio->mapping)) {
>
> Otherwise mapping_large_folio_support() is unhappy:
>
Thanks. The patch below should fix it.
I am going to send V8, since
1. there have been 4 fixes so far for V7, a new series would help people
review;
2. based on the discussion with you in THP cabal meeting, to
convert split_huge_page*() to use __folio_split(), the current
__folio_split() interface becomes awkward. Two changes are needed:
a) use in folio offset instead of struct page, since even in
truncate_inode_partial_folio() I needed to convert in folio offset
struct page to use my current interface;
b) split_huge_page*()'s caller might hold the page lock at a non-head
page, so an additional keep_lock_at_in_folio_offset is needed
to indicate which after-split folio should be kept locked after
split is done.
From 8b2aa5432c8d726a1fb6ce74c971365650da9370 Mon Sep 17 00:00:00 2001
From: Zi Yan <ziy@nvidia.com>
Date: Sun, 16 Feb 2025 09:01:29 -0500
Subject: [PATCH] mm/huge_memory: check folio_test_anon() before
mapping_large_folio_support()
Otherwise mapping_large_folio_support() complains.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
mm/huge_memory.c | 48 ++++++++++++++++++++++++------------------------
1 file changed, 24 insertions(+), 24 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 87cb62c81bf3..deb16fe662c4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3629,20 +3629,19 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
bool warns)
{
- /* order-1 is not supported for anonymous THP. */
- if (folio_test_anon(folio) && new_order == 1) {
- VM_WARN_ONCE(warns, "Cannot split to order-1 folio");
- return false;
- }
-
- /*
- * No split if the file system does not support large folio.
- * Note that we might still have THPs in such mappings due to
- * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
- * does not actually support large folios properly.
- */
- if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+ if (folio_test_anon(folio)) {
+ /* order-1 is not supported for anonymous THP. */
+ VM_WARN_ONCE(warns && new_order == 1,
+ "Cannot split to order-1 folio");
+ return new_order != 1;
+ } else if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
!mapping_large_folio_support(folio->mapping)) {
+ /*
+ * No split if the file system does not support large folio.
+ * Note that we might still have THPs in such mappings due to
+ * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
+ * does not actually support large folios properly.
+ */
VM_WARN_ONCE(warns,
"Cannot split file folio to non-0 order");
return false;
@@ -3662,24 +3661,25 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
bool uniform_split_supported(struct folio *folio, unsigned int new_order,
bool warns)
{
- if (folio_test_anon(folio) && new_order == 1) {
- VM_WARN_ONCE(warns, "Cannot split to order-1 folio");
- return false;
- }
-
- if (new_order) {
+ if (folio_test_anon(folio)) {
+ VM_WARN_ONCE(warns && new_order == 1,
+ "Cannot split to order-1 folio");
+ return new_order != 1;
+ } else if (new_order) {
if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
!mapping_large_folio_support(folio->mapping)) {
VM_WARN_ONCE(warns,
"Cannot split file folio to non-0 order");
return false;
}
- if (folio_test_swapcache(folio)) {
- VM_WARN_ONCE(warns,
- "Cannot split swapcache folio to non-0 order");
- return false;
- }
}
+
+ if (new_order && folio_test_swapcache(folio)) {
+ VM_WARN_ONCE(warns,
+ "Cannot split swapcache folio to non-0 order");
+ return false;
+ }
+
return true;
}
--
2.47.2
--
Best Regards,
Yan, Zi
On 16 Feb 2025, at 9:17, Zi Yan wrote:
> On 16 Feb 2025, at 5:32, David Hildenbrand wrote:
>
>> On 11.02.25 16:50, Zi Yan wrote:
>>> folio_split() splits a large folio in the same way as buddy allocator
>>> splits a large free page for allocation. The purpose is to minimize the
>>> number of folios after the split. For example, if user wants to free the
>>> 3rd subpage in a order-9 folio, folio_split() will split the order-9 folio
>>> as:
>>> O-0, O-0, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-8 if it is anon,
>>> since anon folio does not support order-1 yet.
>>> -----------------------------------------------------------------
>>> | | | | | | | | |
>>> |O-0|O-0|O-0|O-0| O-2 |...| O-7 | O-8 |
>>> | | | | | | | | |
>>> -----------------------------------------------------------------
>>>
>>> O-1, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-9 if it is pagecache
>>> ---------------------------------------------------------------
>>> | | | | | | | |
>>> | O-1 |O-0|O-0| O-2 |...| O-7 | O-8 |
>>> | | | | | | | |
>>> ---------------------------------------------------------------
>>>
>>> It generates fewer folios (i.e., 11 or 10) than existing page split
>>> approach, which splits the order-9 to 512 order-0 folios. It also reduces
>>> the number of new xa_node needed during a pagecache folio split from
>>> 8 to 1, potentially decreasing the folio split failure rate due to memory
>>> constraints.
>>>
>>> folio_split() and existing split_huge_page_to_list_to_order() share
>>> the folio unmapping and remapping code in __folio_split() and the common
>>> backend split code in __split_unmapped_folio() using
>>> uniform_split variable to distinguish their operations.
>>>
>>> uniform_split_supported() and non_uniform_split_supported() are added
>>> to factor out check code and will be used outside __folio_split() in the
>>> following commit.
>>>
>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>> ---
>>> mm/huge_memory.c | 137 ++++++++++++++++++++++++++++++++++-------------
>>> 1 file changed, 100 insertions(+), 37 deletions(-)
>>>
>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>> index 21ebe2dec5a4..400dfe8a6e60 100644
>>> --- a/mm/huge_memory.c
>>> +++ b/mm/huge_memory.c
>>> @@ -3853,12 +3853,68 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
>>> return ret;
>>> }
>>> +static bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
>>> + bool warns)
>>> +{
>>> + /* order-1 is not supported for anonymous THP. */
>>> + if (folio_test_anon(folio) && new_order == 1) {
>>> + VM_WARN_ONCE(warns, "Cannot split to order-1 folio");
>>> + return false;
>>> + }
>>> +
>>> + /*
>>> + * No split if the file system does not support large folio.
>>> + * Note that we might still have THPs in such mappings due to
>>> + * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
>>> + * does not actually support large folios properly.
>>> + */
>>> + if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
>>> + !mapping_large_folio_support(folio->mapping)) {
>>
>> In this (and a similar case below), you need
>>
>> if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
>> !folio_test_anon(folio) &&
>> !mapping_large_folio_support(folio->mapping)) {
>>
>> Otherwise mapping_large_folio_support() is unhappy:
>>
>
> Thanks. The patch below should fix it.
>
> I am going to send V8, since
> 1. there have been 4 fixes so far for V7, a new series would help people
> review;
>
> 2. based on the discussion with you in THP cabal meeting, to
> convert split_huge_page*() to use __folio_split(), the current
> __folio_split() interface becomes awkward. Two changes are needed:
> a) use in folio offset instead of struct page, since even in
> truncate_inode_partial_folio() I needed to convert in folio offset
> struct page to use my current interface;
> b) split_huge_page*()'s caller might hold the page lock at a non-head
> page, so an additional keep_lock_at_in_folio_offset is needed
> to indicate which after-split folio should be kept locked after
> split is done.
>
Hi Andrew,
I am planing to send V8 to collect all fixup patches I have so far plus
the one below and change folio_split() interface and some of the code.
What is your preferred method?
1. you can pick up the fixup below and I send a new set of patches to
change folio_split();
2. I collect a new V8 with all fixup patches and folio_split() change.
For 1, the commit history might be messy due to my new folio_split()
change. For 2, Minimize xa_node allocation during xarry split [1]
patchset depends on patch 1 of this series, which adds some extra work
for you to collect V8 (alternatively, I can send V8 without patch 1).
Let me know your thoughts. Thanks.
[1] https://lore.kernel.org/linux-mm/20250213034355.516610-1-ziy@nvidia.com/
>
> From 8b2aa5432c8d726a1fb6ce74c971365650da9370 Mon Sep 17 00:00:00 2001
> From: Zi Yan <ziy@nvidia.com>
> Date: Sun, 16 Feb 2025 09:01:29 -0500
> Subject: [PATCH] mm/huge_memory: check folio_test_anon() before
> mapping_large_folio_support()
>
> Otherwise mapping_large_folio_support() complains.
>
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
> mm/huge_memory.c | 48 ++++++++++++++++++++++++------------------------
> 1 file changed, 24 insertions(+), 24 deletions(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 87cb62c81bf3..deb16fe662c4 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -3629,20 +3629,19 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
> bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
> bool warns)
> {
> - /* order-1 is not supported for anonymous THP. */
> - if (folio_test_anon(folio) && new_order == 1) {
> - VM_WARN_ONCE(warns, "Cannot split to order-1 folio");
> - return false;
> - }
> -
> - /*
> - * No split if the file system does not support large folio.
> - * Note that we might still have THPs in such mappings due to
> - * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
> - * does not actually support large folios properly.
> - */
> - if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
> + if (folio_test_anon(folio)) {
> + /* order-1 is not supported for anonymous THP. */
> + VM_WARN_ONCE(warns && new_order == 1,
> + "Cannot split to order-1 folio");
> + return new_order != 1;
> + } else if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
> !mapping_large_folio_support(folio->mapping)) {
> + /*
> + * No split if the file system does not support large folio.
> + * Note that we might still have THPs in such mappings due to
> + * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
> + * does not actually support large folios properly.
> + */
> VM_WARN_ONCE(warns,
> "Cannot split file folio to non-0 order");
> return false;
> @@ -3662,24 +3661,25 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
> bool uniform_split_supported(struct folio *folio, unsigned int new_order,
> bool warns)
> {
> - if (folio_test_anon(folio) && new_order == 1) {
> - VM_WARN_ONCE(warns, "Cannot split to order-1 folio");
> - return false;
> - }
> -
> - if (new_order) {
> + if (folio_test_anon(folio)) {
> + VM_WARN_ONCE(warns && new_order == 1,
> + "Cannot split to order-1 folio");
> + return new_order != 1;
> + } else if (new_order) {
> if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
> !mapping_large_folio_support(folio->mapping)) {
> VM_WARN_ONCE(warns,
> "Cannot split file folio to non-0 order");
> return false;
> }
> - if (folio_test_swapcache(folio)) {
> - VM_WARN_ONCE(warns,
> - "Cannot split swapcache folio to non-0 order");
> - return false;
> - }
> }
> +
> + if (new_order && folio_test_swapcache(folio)) {
> + VM_WARN_ONCE(warns,
> + "Cannot split swapcache folio to non-0 order");
> + return false;
> + }
> +
> return true;
> }
>
> --
> 2.47.2
>
>
>
> --
> Best Regards,
> Yan, Zi
--
Best Regards,
Yan, Zi
On Mon, 17 Feb 2025 10:22:44 -0500 Zi Yan <ziy@nvidia.com> wrote: > > > > Thanks. The patch below should fix it. > > > > I am going to send V8, since > > 1. there have been 4 fixes so far for V7, a new series would help people > > review; > > > > 2. based on the discussion with you in THP cabal meeting, to > > convert split_huge_page*() to use __folio_split(), the current > > __folio_split() interface becomes awkward. Two changes are needed: > > a) use in folio offset instead of struct page, since even in > > truncate_inode_partial_folio() I needed to convert in folio offset > > struct page to use my current interface; > > b) split_huge_page*()'s caller might hold the page lock at a non-head > > page, so an additional keep_lock_at_in_folio_offset is needed > > to indicate which after-split folio should be kept locked after > > split is done. > > > > Hi Andrew, > > I am planing to send V8 to collect all fixup patches I have so far plus > the one below and change folio_split() interface and some of the code. > What is your preferred method? > > 1. you can pick up the fixup below and I send a new set of patches to > change folio_split(); > > 2. I collect a new V8 with all fixup patches and folio_split() change. > > For 1, the commit history might be messy due to my new folio_split() > change. For 2, Minimize xa_node allocation during xarry split [1] > patchset depends on patch 1 of this series, which adds some extra work > for you to collect V8 (alternatively, I can send V8 without patch 1). We're only at -rc3, so I'll remove both series from mm.git. Please fully resend both series against mm-unstable?
On 17 Feb 2025, at 23:12, Andrew Morton wrote: > On Mon, 17 Feb 2025 10:22:44 -0500 Zi Yan <ziy@nvidia.com> wrote: > >>> >>> Thanks. The patch below should fix it. >>> >>> I am going to send V8, since >>> 1. there have been 4 fixes so far for V7, a new series would help people >>> review; >>> >>> 2. based on the discussion with you in THP cabal meeting, to >>> convert split_huge_page*() to use __folio_split(), the current >>> __folio_split() interface becomes awkward. Two changes are needed: >>> a) use in folio offset instead of struct page, since even in >>> truncate_inode_partial_folio() I needed to convert in folio offset >>> struct page to use my current interface; >>> b) split_huge_page*()'s caller might hold the page lock at a non-head >>> page, so an additional keep_lock_at_in_folio_offset is needed >>> to indicate which after-split folio should be kept locked after >>> split is done. >>> >> >> Hi Andrew, >> >> I am planing to send V8 to collect all fixup patches I have so far plus >> the one below and change folio_split() interface and some of the code. >> What is your preferred method? >> >> 1. you can pick up the fixup below and I send a new set of patches to >> change folio_split(); >> >> 2. I collect a new V8 with all fixup patches and folio_split() change. >> >> For 1, the commit history might be messy due to my new folio_split() >> change. For 2, Minimize xa_node allocation during xarry split [1] >> patchset depends on patch 1 of this series, which adds some extra work >> for you to collect V8 (alternatively, I can send V8 without patch 1). > > We're only at -rc3, so I'll remove both series from mm.git. Please > fully resend both series against mm-unstable? Got it. Best Regards, Yan, Zi
© 2016 - 2026 Red Hat, Inc.