[v2] mm: thp: always enable mTHP support

[PATCH v2 10/11] mm: thp: always enable mTHP support

Posted by Luiz Capitulino 1 day, 5 hours ago

If PMD-sized pages are not supported on an architecture (ie. the
arch implements arch_has_pmd_leaves() and it returns false) then the
current code disables all THP, including mTHP.

This commit fixes this by allowing mTHP to be always enabled for all
archs. When PMD-sized pages are not supported, its sysfs entry won't be
created and their mapping will be disallowed at page-fault time.

Similarly, this commit implements the following changes for shmem:

 - In shmem_allowable_huge_orders(): drop the pgtable_has_pmd_leaves()
   check so that mTHP sizes are considered
 - In shmem_alloc_and_add_folio(): don't consider PMD and PUD orders
   when PMD-sized pages are not supported by the CPU

Signed-off-by: Luiz Capitulino <luizcap@redhat.com>
---
 mm/huge_memory.c | 11 +++++++----
 mm/shmem.c       |  4 +++-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1e5ea2e47f79..882331592928 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -115,6 +115,9 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
 	else
 		supported_orders = THP_ORDERS_ALL_FILE_DEFAULT;
 
+	if (!pgtable_has_pmd_leaves())
+		supported_orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
+
 	orders &= supported_orders;
 	if (!orders)
 		return 0;
@@ -122,7 +125,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
 	if (!vma->vm_mm)		/* vdso */
 		return 0;
 
-	if (!pgtable_has_pmd_leaves() || vma_thp_disabled(vma, vm_flags, forced_collapse))
+	if (vma_thp_disabled(vma, vm_flags, forced_collapse))
 		return 0;
 
 	/* khugepaged doesn't collapse DAX vma, but page fault is fine. */
@@ -806,6 +809,9 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
 	}
 
 	orders = THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DEFAULT;
+	if (!pgtable_has_pmd_leaves())
+		orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
+
 	order = highest_order(orders);
 	while (orders) {
 		thpsize = thpsize_create(order, *hugepage_kobj);
@@ -905,9 +911,6 @@ static int __init hugepage_init(void)
 	int err;
 	struct kobject *hugepage_kobj;
 
-	if (!pgtable_has_pmd_leaves())
-		return -EINVAL;
-
 	/*
 	 * hugepages can't be allocated by the buddy allocator
 	 */
diff --git a/mm/shmem.c b/mm/shmem.c
index 1c98e84667a4..cb325d1e2d1e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1827,7 +1827,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
 	vm_flags_t vm_flags = vma ? vma->vm_flags : 0;
 	unsigned int global_orders;
 
-	if (!pgtable_has_pmd_leaves() || (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force)))
+	if (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force))
 		return 0;
 
 	global_orders = shmem_huge_global_enabled(inode, index, write_end,
@@ -1935,6 +1935,8 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
 
 	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
 		orders = 0;
+	else if (!pgtable_has_pmd_leaves())
+		orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
 
 	if (orders > 0) {
 		suitable_orders = shmem_suitable_orders(inode, vmf,
-- 
2.53.0

Re: [PATCH v2 10/11] mm: thp: always enable mTHP support

Posted by Baolin Wang 17 hours ago


On 2/10/26 6:14 AM, Luiz Capitulino wrote:
> If PMD-sized pages are not supported on an architecture (ie. the
> arch implements arch_has_pmd_leaves() and it returns false) then the
> current code disables all THP, including mTHP.
> 
> This commit fixes this by allowing mTHP to be always enabled for all
> archs. When PMD-sized pages are not supported, its sysfs entry won't be
> created and their mapping will be disallowed at page-fault time.
> 
> Similarly, this commit implements the following changes for shmem:
> 
>   - In shmem_allowable_huge_orders(): drop the pgtable_has_pmd_leaves()
>     check so that mTHP sizes are considered
>   - In shmem_alloc_and_add_folio(): don't consider PMD and PUD orders
>     when PMD-sized pages are not supported by the CPU
> 
> Signed-off-by: Luiz Capitulino <luizcap@redhat.com>
> ---
>   mm/huge_memory.c | 11 +++++++----
>   mm/shmem.c       |  4 +++-
>   2 files changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 1e5ea2e47f79..882331592928 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -115,6 +115,9 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
>   	else
>   		supported_orders = THP_ORDERS_ALL_FILE_DEFAULT;
>   
> +	if (!pgtable_has_pmd_leaves())
> +		supported_orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
> +
>   	orders &= supported_orders;
>   	if (!orders)
>   		return 0;
> @@ -122,7 +125,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
>   	if (!vma->vm_mm)		/* vdso */
>   		return 0;
>   
> -	if (!pgtable_has_pmd_leaves() || vma_thp_disabled(vma, vm_flags, forced_collapse))
> +	if (vma_thp_disabled(vma, vm_flags, forced_collapse))
>   		return 0;
>   
>   	/* khugepaged doesn't collapse DAX vma, but page fault is fine. */
> @@ -806,6 +809,9 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
>   	}
>   
>   	orders = THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DEFAULT;
> +	if (!pgtable_has_pmd_leaves())
> +		orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));

I think you should also handle the 'huge_anon_orders_inherit' setting in 
this function if pgtable_has_pmd_leaves() returns false. Shmem as well.

if (!anon_orders_configured)
	huge_anon_orders_inherit = BIT(PMD_ORDER);

> +
>   	order = highest_order(orders);
>   	while (orders) {
>   		thpsize = thpsize_create(order, *hugepage_kobj);
> @@ -905,9 +911,6 @@ static int __init hugepage_init(void)
>   	int err;
>   	struct kobject *hugepage_kobj;
>   
> -	if (!pgtable_has_pmd_leaves())
> -		return -EINVAL;
> -
>   	/*
>   	 * hugepages can't be allocated by the buddy allocator
>   	 */
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 1c98e84667a4..cb325d1e2d1e 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1827,7 +1827,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
>   	vm_flags_t vm_flags = vma ? vma->vm_flags : 0;
>   	unsigned int global_orders;
>   
> -	if (!pgtable_has_pmd_leaves() || (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force)))
> +	if (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force))
>   		return 0;
>   
>   	global_orders = shmem_huge_global_enabled(inode, index, write_end,
> @@ -1935,6 +1935,8 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
>   
>   	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
>   		orders = 0;
> +	else if (!pgtable_has_pmd_leaves())
> +		orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));

Moving this check into shmem_allowable_huge_orders() would be more 
appropriate.

>   
>   	if (orders > 0) {
>   		suitable_orders = shmem_suitable_orders(inode, vmf,

Re: [PATCH v2 10/11] mm: thp: always enable mTHP support

Posted by Luiz Capitulino 14 hours ago

On 2026-02-10 04:56, Baolin Wang wrote:
> 
> 
> On 2/10/26 6:14 AM, Luiz Capitulino wrote:
>> If PMD-sized pages are not supported on an architecture (ie. the
>> arch implements arch_has_pmd_leaves() and it returns false) then the
>> current code disables all THP, including mTHP.
>>
>> This commit fixes this by allowing mTHP to be always enabled for all
>> archs. When PMD-sized pages are not supported, its sysfs entry won't be
>> created and their mapping will be disallowed at page-fault time.
>>
>> Similarly, this commit implements the following changes for shmem:
>>
>>   - In shmem_allowable_huge_orders(): drop the pgtable_has_pmd_leaves()
>>     check so that mTHP sizes are considered
>>   - In shmem_alloc_and_add_folio(): don't consider PMD and PUD orders
>>     when PMD-sized pages are not supported by the CPU
>>
>> Signed-off-by: Luiz Capitulino <luizcap@redhat.com>
>> ---
>>   mm/huge_memory.c | 11 +++++++----
>>   mm/shmem.c       |  4 +++-
>>   2 files changed, 10 insertions(+), 5 deletions(-)
>>
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 1e5ea2e47f79..882331592928 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -115,6 +115,9 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
>>       else
>>           supported_orders = THP_ORDERS_ALL_FILE_DEFAULT;
>> +    if (!pgtable_has_pmd_leaves())
>> +        supported_orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
>> +
>>       orders &= supported_orders;
>>       if (!orders)
>>           return 0;
>> @@ -122,7 +125,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
>>       if (!vma->vm_mm)        /* vdso */
>>           return 0;
>> -    if (!pgtable_has_pmd_leaves() || vma_thp_disabled(vma, vm_flags, forced_collapse))
>> +    if (vma_thp_disabled(vma, vm_flags, forced_collapse))
>>           return 0;
>>       /* khugepaged doesn't collapse DAX vma, but page fault is fine. */
>> @@ -806,6 +809,9 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
>>       }
>>       orders = THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DEFAULT;
>> +    if (!pgtable_has_pmd_leaves())
>> +        orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
> 
> I think you should also handle the 'huge_anon_orders_inherit' setting in this function if pgtable_has_pmd_leaves() returns false. Shmem as well.
> 
> if (!anon_orders_configured)
>      huge_anon_orders_inherit = BIT(PMD_ORDER);

Good catch. So, would you agree that should set it to BIT(PMD_ORDER - 1)
in this case?

> 
>> +
>>       order = highest_order(orders);
>>       while (orders) {
>>           thpsize = thpsize_create(order, *hugepage_kobj);
>> @@ -905,9 +911,6 @@ static int __init hugepage_init(void)
>>       int err;
>>       struct kobject *hugepage_kobj;
>> -    if (!pgtable_has_pmd_leaves())
>> -        return -EINVAL;
>> -
>>       /*
>>        * hugepages can't be allocated by the buddy allocator
>>        */
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index 1c98e84667a4..cb325d1e2d1e 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1827,7 +1827,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
>>       vm_flags_t vm_flags = vma ? vma->vm_flags : 0;
>>       unsigned int global_orders;
>> -    if (!pgtable_has_pmd_leaves() || (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force)))
>> +    if (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force))
>>           return 0;
>>       global_orders = shmem_huge_global_enabled(inode, index, write_end,
>> @@ -1935,6 +1935,8 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
>>       if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
>>           orders = 0;
>> +    else if (!pgtable_has_pmd_leaves())
>> +        orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
> 
> Moving this check into shmem_allowable_huge_orders() would be more appropriate.

Will do.

Thanks a lot for the very fast review.

> 
>>       if (orders > 0) {
>>           suitable_orders = shmem_suitable_orders(inode, vmf,
>

Re: [PATCH v2 10/11] mm: thp: always enable mTHP support

Posted by Baolin Wang 2 hours ago


On 2/10/26 9:28 PM, Luiz Capitulino wrote:
> On 2026-02-10 04:56, Baolin Wang wrote:
>>
>>
>> On 2/10/26 6:14 AM, Luiz Capitulino wrote:
>>> If PMD-sized pages are not supported on an architecture (ie. the
>>> arch implements arch_has_pmd_leaves() and it returns false) then the
>>> current code disables all THP, including mTHP.
>>>
>>> This commit fixes this by allowing mTHP to be always enabled for all
>>> archs. When PMD-sized pages are not supported, its sysfs entry won't be
>>> created and their mapping will be disallowed at page-fault time.
>>>
>>> Similarly, this commit implements the following changes for shmem:
>>>
>>>   - In shmem_allowable_huge_orders(): drop the pgtable_has_pmd_leaves()
>>>     check so that mTHP sizes are considered
>>>   - In shmem_alloc_and_add_folio(): don't consider PMD and PUD orders
>>>     when PMD-sized pages are not supported by the CPU
>>>
>>> Signed-off-by: Luiz Capitulino <luizcap@redhat.com>
>>> ---
>>>   mm/huge_memory.c | 11 +++++++----
>>>   mm/shmem.c       |  4 +++-
>>>   2 files changed, 10 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>> index 1e5ea2e47f79..882331592928 100644
>>> --- a/mm/huge_memory.c
>>> +++ b/mm/huge_memory.c
>>> @@ -115,6 +115,9 @@ unsigned long __thp_vma_allowable_orders(struct 
>>> vm_area_struct *vma,
>>>       else
>>>           supported_orders = THP_ORDERS_ALL_FILE_DEFAULT;
>>> +    if (!pgtable_has_pmd_leaves())
>>> +        supported_orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
>>> +
>>>       orders &= supported_orders;
>>>       if (!orders)
>>>           return 0;
>>> @@ -122,7 +125,7 @@ unsigned long __thp_vma_allowable_orders(struct 
>>> vm_area_struct *vma,
>>>       if (!vma->vm_mm)        /* vdso */
>>>           return 0;
>>> -    if (!pgtable_has_pmd_leaves() || vma_thp_disabled(vma, vm_flags, 
>>> forced_collapse))
>>> +    if (vma_thp_disabled(vma, vm_flags, forced_collapse))
>>>           return 0;
>>>       /* khugepaged doesn't collapse DAX vma, but page fault is fine. */
>>> @@ -806,6 +809,9 @@ static int __init hugepage_init_sysfs(struct 
>>> kobject **hugepage_kobj)
>>>       }
>>>       orders = THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DEFAULT;
>>> +    if (!pgtable_has_pmd_leaves())
>>> +        orders &= ~(BIT(PMD_ORDER) | BIT(PUD_ORDER));
>>
>> I think you should also handle the 'huge_anon_orders_inherit' setting 
>> in this function if pgtable_has_pmd_leaves() returns false. Shmem as 
>> well.
>>
>> if (!anon_orders_configured)
>>      huge_anon_orders_inherit = BIT(PMD_ORDER);
> 
> Good catch. So, would you agree that should set it to BIT(PMD_ORDER - 1)
> in this case?

 From the documentation:
"
By default, PMD-sized hugepages have enabled="inherit" and all other
hugepage sizes have enabled="never".
"

So if pgtable_has_pmd_leaves() returns false, IMO, we should just skip 
setting the PMD-sized order for huge_anon_orders_inherit. What I mean is:

if (!anon_orders_configured && pgtable_has_pmd_leaves())
	huge_anon_orders_inherit = BIT(PMD_ORDER);