In preparation for supporting try_to_unmap_one() to unmap PMD-mapped
folios, start the pagewalk first, then call split_huge_pmd_address() to
split the folio.
Suggested-by: David Hildenbrand <david@redhat.com>
Suggested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: Lance Yang <ioworker0@gmail.com>
---
include/linux/huge_mm.h | 6 ++++++
include/linux/rmap.h | 24 +++++++++++++++++++++++
mm/huge_memory.c | 42 +++++++++++++++++++++--------------------
mm/rmap.c | 21 +++++++++++++++------
4 files changed, 67 insertions(+), 26 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 7ad41de5eaea..9f720b0731c4 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -428,6 +428,9 @@ static inline bool thp_migration_supported(void)
return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
}
+void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmd, bool freeze, struct folio *folio);
+
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline bool folio_test_pmd_mappable(struct folio *folio)
@@ -490,6 +493,9 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long address, bool freeze, struct folio *folio) {}
static inline void split_huge_pmd_address(struct vm_area_struct *vma,
unsigned long address, bool freeze, struct folio *folio) {}
+static inline void split_huge_pmd_locked(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmd,
+ bool freeze, struct folio *folio) {}
#define split_huge_pud(__vma, __pmd, __address) \
do { } while (0)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 0fd9bebce54c..d1c5e2d694b2 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -703,6 +703,30 @@ static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
spin_unlock(pvmw->ptl);
}
+/**
+ * page_vma_mapped_walk_restart - Restart the page table walk.
+ * @pvmw: Pointer to struct page_vma_mapped_walk.
+ *
+ * It restarts the page table walk when changes occur in the page
+ * table, such as splitting a PMD. Ensures that the PTL held during
+ * the previous walk is released and resets the state to allow for
+ * a new walk starting at the current address stored in pvmw->address.
+ */
+static inline void
+page_vma_mapped_walk_restart(struct page_vma_mapped_walk *pvmw)
+{
+ WARN_ON_ONCE(!pvmw->pmd && !pvmw->pte);
+
+ if (likely(pvmw->ptl))
+ spin_unlock(pvmw->ptl);
+ else
+ WARN_ON_ONCE(1);
+
+ pvmw->ptl = NULL;
+ pvmw->pmd = NULL;
+ pvmw->pte = NULL;
+}
+
bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
/*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 70d20fefc6db..e766d3f3a302 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2582,6 +2582,27 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
pmd_populate(mm, pmd, pgtable);
}
+void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmd, bool freeze, struct folio *folio)
+{
+ VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio));
+ VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE));
+ VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
+ VM_BUG_ON(freeze && !folio);
+
+ /*
+ * When the caller requests to set up a migration entry, we
+ * require a folio to check the PMD against. Otherwise, there
+ * is a risk of replacing the wrong folio.
+ */
+ if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
+ is_pmd_migration_entry(*pmd)) {
+ if (folio && folio != pmd_folio(*pmd))
+ return;
+ __split_huge_pmd_locked(vma, pmd, address, freeze);
+ }
+}
+
void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long address, bool freeze, struct folio *folio)
{
@@ -2593,26 +2614,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
(address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE);
mmu_notifier_invalidate_range_start(&range);
ptl = pmd_lock(vma->vm_mm, pmd);
-
- /*
- * If caller asks to setup a migration entry, we need a folio to check
- * pmd against. Otherwise we can end up replacing wrong folio.
- */
- VM_BUG_ON(freeze && !folio);
- VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
-
- if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
- is_pmd_migration_entry(*pmd)) {
- /*
- * It's safe to call pmd_page when folio is set because it's
- * guaranteed that pmd is present.
- */
- if (folio && folio != pmd_folio(*pmd))
- goto out;
- __split_huge_pmd_locked(vma, pmd, range.start, freeze);
- }
-
-out:
+ split_huge_pmd_locked(vma, range.start, pmd, freeze, folio);
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(&range);
}
diff --git a/mm/rmap.c b/mm/rmap.c
index 2d778725e4f5..dacf24bc82f0 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1642,9 +1642,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
if (flags & TTU_SYNC)
pvmw.flags = PVMW_SYNC;
- if (flags & TTU_SPLIT_HUGE_PMD)
- split_huge_pmd_address(vma, address, false, folio);
-
/*
* For THP, we have to assume the worse case ie pmd for invalidation.
* For hugetlb, it could be much worse if we need to do pud
@@ -1670,9 +1667,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
- /* Unexpected PMD-mapped THP? */
- VM_BUG_ON_FOLIO(!pvmw.pte, folio);
-
/*
* If the folio is in an mlock()d vma, we must not swap it out.
*/
@@ -1684,6 +1678,21 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
goto walk_abort;
}
+ if (!pvmw.pte && (flags & TTU_SPLIT_HUGE_PMD)) {
+ /*
+ * We temporarily have to drop the PTL and start once
+ * again from that now-PTE-mapped page table.
+ */
+ split_huge_pmd_locked(vma, pvmw.address, pvmw.pmd,
+ false, folio);
+ flags &= ~TTU_SPLIT_HUGE_PMD;
+ page_vma_mapped_walk_restart(&pvmw);
+ continue;
+ }
+
+ /* Unexpected PMD-mapped THP? */
+ VM_BUG_ON_FOLIO(!pvmw.pte, folio);
+
pfn = pte_pfn(ptep_get(pvmw.pte));
subpage = folio_page(folio, pfn - folio_pfn(folio));
address = pvmw.address;
--
2.33.1
On 13 Jun 2024, at 21:51, Lance Yang wrote: > In preparation for supporting try_to_unmap_one() to unmap PMD-mapped > folios, start the pagewalk first, then call split_huge_pmd_address() to > split the folio. > > Suggested-by: David Hildenbrand <david@redhat.com> > Suggested-by: Baolin Wang <baolin.wang@linux.alibaba.com> > Signed-off-by: Lance Yang <ioworker0@gmail.com> > --- > include/linux/huge_mm.h | 6 ++++++ > include/linux/rmap.h | 24 +++++++++++++++++++++++ > mm/huge_memory.c | 42 +++++++++++++++++++++-------------------- > mm/rmap.c | 21 +++++++++++++++------ > 4 files changed, 67 insertions(+), 26 deletions(-) Thanks. Acked-by: Zi Yan <ziy@nvidia.com> -- Best Regards, Yan, Zi
On Fri, Jun 14, 2024 at 10:26 PM Zi Yan <ziy@nvidia.com> wrote: > > On 13 Jun 2024, at 21:51, Lance Yang wrote: > > > In preparation for supporting try_to_unmap_one() to unmap PMD-mapped > > folios, start the pagewalk first, then call split_huge_pmd_address() to > > split the folio. > > > > Suggested-by: David Hildenbrand <david@redhat.com> > > Suggested-by: Baolin Wang <baolin.wang@linux.alibaba.com> > > Signed-off-by: Lance Yang <ioworker0@gmail.com> > > --- > > include/linux/huge_mm.h | 6 ++++++ > > include/linux/rmap.h | 24 +++++++++++++++++++++++ > > mm/huge_memory.c | 42 +++++++++++++++++++++-------------------- > > mm/rmap.c | 21 +++++++++++++++------ > > 4 files changed, 67 insertions(+), 26 deletions(-) > > Thanks. > > Acked-by: Zi Yan <ziy@nvidia.com> Thanks for taking time to review! Lance > > -- > Best Regards, > Yan, Zi
On 14.06.24 03:51, Lance Yang wrote: > In preparation for supporting try_to_unmap_one() to unmap PMD-mapped > folios, start the pagewalk first, then call split_huge_pmd_address() to > split the folio. > > Suggested-by: David Hildenbrand <david@redhat.com> > Suggested-by: Baolin Wang <baolin.wang@linux.alibaba.com> > Signed-off-by: Lance Yang <ioworker0@gmail.com> > --- Would have converted that VM_BUG_ON to a VM_WARN_ON_ONCE, but it's just moving code, so no big deal. Thanks! Acked-by: David Hildenbrand <david@redhat.com> -- Cheers, David / dhildenb
On Fri, Jun 14, 2024 at 3:34 PM David Hildenbrand <david@redhat.com> wrote: > > On 14.06.24 03:51, Lance Yang wrote: > > In preparation for supporting try_to_unmap_one() to unmap PMD-mapped > > folios, start the pagewalk first, then call split_huge_pmd_address() to > > split the folio. > > > > Suggested-by: David Hildenbrand <david@redhat.com> > > Suggested-by: Baolin Wang <baolin.wang@linux.alibaba.com> > > Signed-off-by: Lance Yang <ioworker0@gmail.com> > > --- > > Would have converted that VM_BUG_ON to a VM_WARN_ON_ONCE, but it's just > moving code, so no big deal. OK, let’s leave it as is for now ;) > > Thanks! > > Acked-by: David Hildenbrand <david@redhat.com> Thanks for taking time to review! Lance > > -- > Cheers, > > David / dhildenb >
© 2016 - 2025 Red Hat, Inc.