Introduce do_huge_zero_wp_pmd() to handle wp-fault on a hugezeropage and
replace it with a PMD-mapped THP. Remember to flush TLB entry
corresponding to the hugezeropage. In case of failure, fallback
to splitting the PMD.
Signed-off-by: Dev Jain <dev.jain@arm.com>
---
mm/huge_memory.c | 44 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 43 insertions(+), 1 deletion(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index bdbf67c18f6c..fbb195bc2038 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1790,6 +1790,41 @@ void huge_pmd_set_accessed(struct vm_fault *vmf)
spin_unlock(vmf->ptl);
}
+static vm_fault_t do_huge_zero_wp_pmd(struct vm_fault *vmf)
+{
+ unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
+ struct vm_area_struct *vma = vmf->vma;
+ struct mmu_notifier_range range;
+ struct folio *folio;
+ vm_fault_t ret = 0;
+
+ folio = vma_alloc_anon_folio_pmd(vma, vmf->address);
+ if (unlikely(!folio)) {
+ ret = VM_FAULT_FALLBACK;
+ goto out;
+ }
+
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, haddr,
+ haddr + HPAGE_PMD_SIZE);
+ mmu_notifier_invalidate_range_start(&range);
+ vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+ if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd)))
+ goto release;
+ ret = check_stable_address_space(vma->vm_mm);
+ if (ret)
+ goto release;
+ (void)pmdp_huge_clear_flush(vma, haddr, vmf->pmd);
+ map_anon_folio_pmd(folio, vmf->pmd, vma, haddr);
+ goto unlock;
+release:
+ folio_put(folio);
+unlock:
+ spin_unlock(vmf->ptl);
+ mmu_notifier_invalidate_range_end(&range);
+out:
+ return ret;
+}
+
vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
{
const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
@@ -1802,8 +1837,15 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);
VM_BUG_ON_VMA(!vma->anon_vma, vma);
- if (is_huge_zero_pmd(orig_pmd))
+ if (is_huge_zero_pmd(orig_pmd)) {
+ vm_fault_t ret = do_huge_zero_wp_pmd(vmf);
+
+ if (!(ret & VM_FAULT_FALLBACK))
+ return ret;
+
+ /* Fallback to splitting PMD if THP cannot be allocated */
goto fallback;
+ }
spin_lock(vmf->ptl);
--
2.30.2
On 2024/9/24 18:16, Dev Jain wrote: > Introduce do_huge_zero_wp_pmd() to handle wp-fault on a hugezeropage and > replace it with a PMD-mapped THP. Remember to flush TLB entry > corresponding to the hugezeropage. In case of failure, fallback > to splitting the PMD. > > Signed-off-by: Dev Jain <dev.jain@arm.com> > --- > mm/huge_memory.c | 44 +++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 43 insertions(+), 1 deletion(-) > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index bdbf67c18f6c..fbb195bc2038 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -1790,6 +1790,41 @@ void huge_pmd_set_accessed(struct vm_fault *vmf) > spin_unlock(vmf->ptl); > } > > +static vm_fault_t do_huge_zero_wp_pmd(struct vm_fault *vmf) > +{ > + unsigned long haddr = vmf->address & HPAGE_PMD_MASK; > + struct vm_area_struct *vma = vmf->vma; > + struct mmu_notifier_range range; > + struct folio *folio; > + vm_fault_t ret = 0; > + > + folio = vma_alloc_anon_folio_pmd(vma, vmf->address); > + if (unlikely(!folio)) { > + ret = VM_FAULT_FALLBACK; I'd like to return VM_FAULT_FALLBACK if you re-post. Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com> > + goto out; > + } > + > + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, haddr, > + haddr + HPAGE_PMD_SIZE); > + mmu_notifier_invalidate_range_start(&range); > + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); > + if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd))) > + goto release; > + ret = check_stable_address_space(vma->vm_mm); > + if (ret) > + goto release; > + (void)pmdp_huge_clear_flush(vma, haddr, vmf->pmd); > + map_anon_folio_pmd(folio, vmf->pmd, vma, haddr); > + goto unlock; > +release: > + folio_put(folio); > +unlock: > + spin_unlock(vmf->ptl); > + mmu_notifier_invalidate_range_end(&range); > +out: > + return ret; > +} > + > vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf) > { > const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; > @@ -1802,8 +1837,15 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf) > vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd); > VM_BUG_ON_VMA(!vma->anon_vma, vma); > > - if (is_huge_zero_pmd(orig_pmd)) > + if (is_huge_zero_pmd(orig_pmd)) { > + vm_fault_t ret = do_huge_zero_wp_pmd(vmf); > + > + if (!(ret & VM_FAULT_FALLBACK)) > + return ret; > + > + /* Fallback to splitting PMD if THP cannot be allocated */ > goto fallback; > + } > > spin_lock(vmf->ptl); >
On 9/24/24 18:43, Kefeng Wang wrote: > > > On 2024/9/24 18:16, Dev Jain wrote: >> Introduce do_huge_zero_wp_pmd() to handle wp-fault on a hugezeropage and >> replace it with a PMD-mapped THP. Remember to flush TLB entry >> corresponding to the hugezeropage. In case of failure, fallback >> to splitting the PMD. >> >> Signed-off-by: Dev Jain <dev.jain@arm.com> >> --- >> mm/huge_memory.c | 44 +++++++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 43 insertions(+), 1 deletion(-) >> >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >> index bdbf67c18f6c..fbb195bc2038 100644 >> --- a/mm/huge_memory.c >> +++ b/mm/huge_memory.c >> @@ -1790,6 +1790,41 @@ void huge_pmd_set_accessed(struct vm_fault *vmf) >> spin_unlock(vmf->ptl); >> } >> +static vm_fault_t do_huge_zero_wp_pmd(struct vm_fault *vmf) >> +{ >> + unsigned long haddr = vmf->address & HPAGE_PMD_MASK; >> + struct vm_area_struct *vma = vmf->vma; >> + struct mmu_notifier_range range; >> + struct folio *folio; >> + vm_fault_t ret = 0; >> + >> + folio = vma_alloc_anon_folio_pmd(vma, vmf->address); >> + if (unlikely(!folio)) { >> + ret = VM_FAULT_FALLBACK; > > I'd like to return VM_FAULT_FALLBACK if you re-post. That's cleaner. I need to keep my "goto" obsession in control... > > Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com> Thanks! > >> + goto out; >> + } >> + >> + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, >> haddr, >> + haddr + HPAGE_PMD_SIZE); >> + mmu_notifier_invalidate_range_start(&range); >> + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); >> + if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd))) >> + goto release; >> + ret = check_stable_address_space(vma->vm_mm); >> + if (ret) >> + goto release; >> + (void)pmdp_huge_clear_flush(vma, haddr, vmf->pmd); >> + map_anon_folio_pmd(folio, vmf->pmd, vma, haddr); >> + goto unlock; >> +release: >> + folio_put(folio); >> +unlock: >> + spin_unlock(vmf->ptl); >> + mmu_notifier_invalidate_range_end(&range); >> +out: >> + return ret; >> +} >> + >> vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf) >> { >> const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; >> @@ -1802,8 +1837,15 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault >> *vmf) >> vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd); >> VM_BUG_ON_VMA(!vma->anon_vma, vma); >> - if (is_huge_zero_pmd(orig_pmd)) >> + if (is_huge_zero_pmd(orig_pmd)) { >> + vm_fault_t ret = do_huge_zero_wp_pmd(vmf); >> + >> + if (!(ret & VM_FAULT_FALLBACK)) >> + return ret; >> + >> + /* Fallback to splitting PMD if THP cannot be allocated */ >> goto fallback; >> + } >> spin_lock(vmf->ptl); > >
On 24.09.24 12:16, Dev Jain wrote: > Introduce do_huge_zero_wp_pmd() to handle wp-fault on a hugezeropage and > replace it with a PMD-mapped THP. Remember to flush TLB entry > corresponding to the hugezeropage. In case of failure, fallback > to splitting the PMD. > > Signed-off-by: Dev Jain <dev.jain@arm.com> Nothing jumped at me and it looks much cleaner now Acked-by: David Hildenbrand <david@redhat.com> -- Cheers, David / dhildenb
© 2016 - 2024 Red Hat, Inc.