Introduce do_huge_zero_wp_pmd() to handle wp-fault on a hugezeropage and
replace it with a PMD-mapped THP. Remember to flush TLB entry
corresponding to the hugezeropage. In case of failure, fallback
to splitting the PMD.
Signed-off-by: Dev Jain <dev.jain@arm.com>
---
mm/huge_memory.c | 44 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 43 insertions(+), 1 deletion(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index bdbf67c18f6c..fbb195bc2038 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1790,6 +1790,41 @@ void huge_pmd_set_accessed(struct vm_fault *vmf)
spin_unlock(vmf->ptl);
}
+static vm_fault_t do_huge_zero_wp_pmd(struct vm_fault *vmf)
+{
+ unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
+ struct vm_area_struct *vma = vmf->vma;
+ struct mmu_notifier_range range;
+ struct folio *folio;
+ vm_fault_t ret = 0;
+
+ folio = vma_alloc_anon_folio_pmd(vma, vmf->address);
+ if (unlikely(!folio)) {
+ ret = VM_FAULT_FALLBACK;
+ goto out;
+ }
+
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, haddr,
+ haddr + HPAGE_PMD_SIZE);
+ mmu_notifier_invalidate_range_start(&range);
+ vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+ if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd)))
+ goto release;
+ ret = check_stable_address_space(vma->vm_mm);
+ if (ret)
+ goto release;
+ (void)pmdp_huge_clear_flush(vma, haddr, vmf->pmd);
+ map_anon_folio_pmd(folio, vmf->pmd, vma, haddr);
+ goto unlock;
+release:
+ folio_put(folio);
+unlock:
+ spin_unlock(vmf->ptl);
+ mmu_notifier_invalidate_range_end(&range);
+out:
+ return ret;
+}
+
vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
{
const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
@@ -1802,8 +1837,15 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);
VM_BUG_ON_VMA(!vma->anon_vma, vma);
- if (is_huge_zero_pmd(orig_pmd))
+ if (is_huge_zero_pmd(orig_pmd)) {
+ vm_fault_t ret = do_huge_zero_wp_pmd(vmf);
+
+ if (!(ret & VM_FAULT_FALLBACK))
+ return ret;
+
+ /* Fallback to splitting PMD if THP cannot be allocated */
goto fallback;
+ }
spin_lock(vmf->ptl);
--
2.30.2
On 2024/9/24 18:16, Dev Jain wrote:
> Introduce do_huge_zero_wp_pmd() to handle wp-fault on a hugezeropage and
> replace it with a PMD-mapped THP. Remember to flush TLB entry
> corresponding to the hugezeropage. In case of failure, fallback
> to splitting the PMD.
>
> Signed-off-by: Dev Jain <dev.jain@arm.com>
> ---
> mm/huge_memory.c | 44 +++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 43 insertions(+), 1 deletion(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index bdbf67c18f6c..fbb195bc2038 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -1790,6 +1790,41 @@ void huge_pmd_set_accessed(struct vm_fault *vmf)
> spin_unlock(vmf->ptl);
> }
>
> +static vm_fault_t do_huge_zero_wp_pmd(struct vm_fault *vmf)
> +{
> + unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
> + struct vm_area_struct *vma = vmf->vma;
> + struct mmu_notifier_range range;
> + struct folio *folio;
> + vm_fault_t ret = 0;
> +
> + folio = vma_alloc_anon_folio_pmd(vma, vmf->address);
> + if (unlikely(!folio)) {
> + ret = VM_FAULT_FALLBACK;
I'd like to return VM_FAULT_FALLBACK if you re-post.
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> + goto out;
> + }
> +
> + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, haddr,
> + haddr + HPAGE_PMD_SIZE);
> + mmu_notifier_invalidate_range_start(&range);
> + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
> + if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd)))
> + goto release;
> + ret = check_stable_address_space(vma->vm_mm);
> + if (ret)
> + goto release;
> + (void)pmdp_huge_clear_flush(vma, haddr, vmf->pmd);
> + map_anon_folio_pmd(folio, vmf->pmd, vma, haddr);
> + goto unlock;
> +release:
> + folio_put(folio);
> +unlock:
> + spin_unlock(vmf->ptl);
> + mmu_notifier_invalidate_range_end(&range);
> +out:
> + return ret;
> +}
> +
> vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
> {
> const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
> @@ -1802,8 +1837,15 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
> vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);
> VM_BUG_ON_VMA(!vma->anon_vma, vma);
>
> - if (is_huge_zero_pmd(orig_pmd))
> + if (is_huge_zero_pmd(orig_pmd)) {
> + vm_fault_t ret = do_huge_zero_wp_pmd(vmf);
> +
> + if (!(ret & VM_FAULT_FALLBACK))
> + return ret;
> +
> + /* Fallback to splitting PMD if THP cannot be allocated */
> goto fallback;
> + }
>
> spin_lock(vmf->ptl);
>
On 9/24/24 18:43, Kefeng Wang wrote:
>
>
> On 2024/9/24 18:16, Dev Jain wrote:
>> Introduce do_huge_zero_wp_pmd() to handle wp-fault on a hugezeropage and
>> replace it with a PMD-mapped THP. Remember to flush TLB entry
>> corresponding to the hugezeropage. In case of failure, fallback
>> to splitting the PMD.
>>
>> Signed-off-by: Dev Jain <dev.jain@arm.com>
>> ---
>> mm/huge_memory.c | 44 +++++++++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 43 insertions(+), 1 deletion(-)
>>
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index bdbf67c18f6c..fbb195bc2038 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -1790,6 +1790,41 @@ void huge_pmd_set_accessed(struct vm_fault *vmf)
>> spin_unlock(vmf->ptl);
>> }
>> +static vm_fault_t do_huge_zero_wp_pmd(struct vm_fault *vmf)
>> +{
>> + unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
>> + struct vm_area_struct *vma = vmf->vma;
>> + struct mmu_notifier_range range;
>> + struct folio *folio;
>> + vm_fault_t ret = 0;
>> +
>> + folio = vma_alloc_anon_folio_pmd(vma, vmf->address);
>> + if (unlikely(!folio)) {
>> + ret = VM_FAULT_FALLBACK;
>
> I'd like to return VM_FAULT_FALLBACK if you re-post.
That's cleaner. I need to keep my "goto" obsession in control...
>
> Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Thanks!
>
>> + goto out;
>> + }
>> +
>> + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
>> haddr,
>> + haddr + HPAGE_PMD_SIZE);
>> + mmu_notifier_invalidate_range_start(&range);
>> + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
>> + if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd)))
>> + goto release;
>> + ret = check_stable_address_space(vma->vm_mm);
>> + if (ret)
>> + goto release;
>> + (void)pmdp_huge_clear_flush(vma, haddr, vmf->pmd);
>> + map_anon_folio_pmd(folio, vmf->pmd, vma, haddr);
>> + goto unlock;
>> +release:
>> + folio_put(folio);
>> +unlock:
>> + spin_unlock(vmf->ptl);
>> + mmu_notifier_invalidate_range_end(&range);
>> +out:
>> + return ret;
>> +}
>> +
>> vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
>> {
>> const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
>> @@ -1802,8 +1837,15 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault
>> *vmf)
>> vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);
>> VM_BUG_ON_VMA(!vma->anon_vma, vma);
>> - if (is_huge_zero_pmd(orig_pmd))
>> + if (is_huge_zero_pmd(orig_pmd)) {
>> + vm_fault_t ret = do_huge_zero_wp_pmd(vmf);
>> +
>> + if (!(ret & VM_FAULT_FALLBACK))
>> + return ret;
>> +
>> + /* Fallback to splitting PMD if THP cannot be allocated */
>> goto fallback;
>> + }
>> spin_lock(vmf->ptl);
>
>
On 24.09.24 12:16, Dev Jain wrote: > Introduce do_huge_zero_wp_pmd() to handle wp-fault on a hugezeropage and > replace it with a PMD-mapped THP. Remember to flush TLB entry > corresponding to the hugezeropage. In case of failure, fallback > to splitting the PMD. > > Signed-off-by: Dev Jain <dev.jain@arm.com> Nothing jumped at me and it looks much cleaner now Acked-by: David Hildenbrand <david@redhat.com> -- Cheers, David / dhildenb
© 2016 - 2026 Red Hat, Inc.