Change hugepage_add_new_anon_rmap() to take in a folio directly. While
adding a folio variable to the four call sites, convert page functions
with their folio equivalents. This removes three callers of the Huge Page
macros which take in a page.
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
---
include/linux/rmap.h | 2 +-
mm/hugetlb.c | 90 +++++++++++++++++++++++++-------------------
mm/rmap.c | 6 +--
3 files changed, 55 insertions(+), 43 deletions(-)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index a6bd1f0a183d..a4570da03e58 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -203,7 +203,7 @@ void page_remove_rmap(struct page *, struct vm_area_struct *,
void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
unsigned long address, rmap_t flags);
-void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *,
+void hugepage_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
unsigned long address);
static inline void __page_dup_rmap(struct page *page, bool compound)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c37a26c8392c..6f25055c3ba5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4950,7 +4950,7 @@ hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long add
struct folio *new_folio)
{
__folio_mark_uptodate(new_folio);
- hugepage_add_new_anon_rmap(&new_folio->page, vma, addr);
+ hugepage_add_new_anon_rmap(new_folio, vma, addr);
set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, &new_folio->page, 1));
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
folio_set_hugetlb_migratable(new_folio);
@@ -5479,6 +5479,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t pte;
struct hstate *h = hstate_vma(vma);
struct page *old_page, *new_page;
+ struct folio *new_folio = NULL;
int outside_reserve = 0;
vm_fault_t ret = 0;
unsigned long haddr = address & huge_page_mask(h);
@@ -5590,6 +5591,9 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_release_old;
}
+ if (new_page)
+ new_folio = page_folio(new_page);
+
/*
* When the original hugepage is shared one, it does not have
* anon_vma prepared.
@@ -5599,9 +5603,9 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_release_all;
}
- copy_user_huge_page(new_page, old_page, address, vma,
+ copy_user_huge_page(&new_folio->page, old_page, address, vma,
pages_per_huge_page(h));
- __SetPageUptodate(new_page);
+ __folio_mark_uptodate(new_folio);
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, haddr,
haddr + huge_page_size(h));
@@ -5618,10 +5622,10 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
huge_ptep_clear_flush(vma, haddr, ptep);
mmu_notifier_invalidate_range(mm, range.start, range.end);
page_remove_rmap(old_page, vma, true);
- hugepage_add_new_anon_rmap(new_page, vma, haddr);
+ hugepage_add_new_anon_rmap(new_folio, vma, haddr);
set_huge_pte_at(mm, haddr, ptep,
- make_huge_pte(vma, new_page, !unshare));
- SetHPageMigratable(new_page);
+ make_huge_pte(vma, &new_folio->page, !unshare));
+ folio_set_hugetlb_migratable(new_folio);
/* Make the old page be freed below */
new_page = old_page;
}
@@ -5633,8 +5637,8 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
* unshare)
*/
if (new_page != old_page)
- restore_reserve_on_error(h, vma, haddr, new_page);
- put_page(new_page);
+ restore_reserve_on_error(h, vma, haddr, &new_folio->page);
+ folio_put(new_folio);
out_release_old:
put_page(old_page);
@@ -5756,6 +5760,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
int anon_rmap = 0;
unsigned long size;
struct page *page;
+ struct folio *folio = NULL;
pte_t new_pte;
spinlock_t *ptl;
unsigned long haddr = address & huge_page_mask(h);
@@ -5833,12 +5838,16 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
ret = 0;
goto out;
}
- clear_huge_page(page, address, pages_per_huge_page(h));
- __SetPageUptodate(page);
+
+ if (page)
+ folio = page_folio(page);
+
+ clear_huge_page(&folio->page, address, pages_per_huge_page(h));
+ __folio_mark_uptodate(folio);
new_page = true;
if (vma->vm_flags & VM_MAYSHARE) {
- int err = hugetlb_add_to_page_cache(page, mapping, idx);
+ int err = hugetlb_add_to_page_cache(&folio->page, mapping, idx);
if (err) {
/*
* err can't be -EEXIST which implies someone
@@ -5847,13 +5856,13 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
* to the page cache. So it's safe to call
* restore_reserve_on_error() here.
*/
- restore_reserve_on_error(h, vma, haddr, page);
- put_page(page);
+ restore_reserve_on_error(h, vma, haddr, &folio->page);
+ folio_put(folio);
goto out;
}
new_pagecache_page = true;
} else {
- lock_page(page);
+ folio_lock(folio);
if (unlikely(anon_vma_prepare(vma))) {
ret = VM_FAULT_OOM;
goto backout_unlocked;
@@ -5861,12 +5870,13 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
anon_rmap = 1;
}
} else {
+ folio = page_folio(page);
/*
* If memory error occurs between mmap() and fault, some process
* don't have hwpoisoned swap entry for errored virtual address.
* So we need to block hugepage fault by PG_hwpoison bit check.
*/
- if (unlikely(PageHWPoison(page))) {
+ if (unlikely(folio_test_hwpoison(folio))) {
ret = VM_FAULT_HWPOISON_LARGE |
VM_FAULT_SET_HINDEX(hstate_index(h));
goto backout_unlocked;
@@ -5874,8 +5884,8 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
/* Check for page in userfault range. */
if (userfaultfd_minor(vma)) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
/* See comment in userfaultfd_missing() block above */
if (!hugetlb_pte_stable(h, mm, ptep, old_pte)) {
ret = 0;
@@ -5909,10 +5919,10 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
goto backout;
if (anon_rmap)
- hugepage_add_new_anon_rmap(page, vma, haddr);
+ hugepage_add_new_anon_rmap(folio, vma, haddr);
else
- page_dup_file_rmap(page, true);
- new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
+ page_dup_file_rmap(&folio->page, true);
+ new_pte = make_huge_pte(vma, &folio->page, ((vma->vm_flags & VM_WRITE)
&& (vma->vm_flags & VM_SHARED)));
/*
* If this pte was previously wr-protected, keep it wr-protected even
@@ -5925,7 +5935,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
hugetlb_count_add(pages_per_huge_page(h), mm);
if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
/* Optimization, do the COW without a second fault */
- ret = hugetlb_wp(mm, vma, address, ptep, flags, page, ptl);
+ ret = hugetlb_wp(mm, vma, address, ptep, flags, &folio->page, ptl);
}
spin_unlock(ptl);
@@ -5936,9 +5946,9 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
* been isolated for migration.
*/
if (new_page)
- SetHPageMigratable(page);
+ folio_set_hugetlb_migratable(folio);
- unlock_page(page);
+ folio_unlock(folio);
out:
hugetlb_vma_unlock_read(vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -5948,10 +5958,10 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
spin_unlock(ptl);
backout_unlocked:
if (new_page && !new_pagecache_page)
- restore_reserve_on_error(h, vma, haddr, page);
+ restore_reserve_on_error(h, vma, haddr, &folio->page);
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
goto out;
}
@@ -6176,6 +6186,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
spinlock_t *ptl;
int ret = -ENOMEM;
struct page *page;
+ struct folio *folio = NULL;
int writable;
bool page_in_pagecache = false;
@@ -6251,12 +6262,15 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
*pagep = NULL;
}
+ if (page)
+ folio = page_folio(page);
+
/*
- * The memory barrier inside __SetPageUptodate makes sure that
+ * The memory barrier inside __folio_mark_uptodate makes sure that
* preceding stores to the page contents become visible before
* the set_pte_at() write.
*/
- __SetPageUptodate(page);
+ __folio_mark_uptodate(folio);
/* Add shared, newly allocated pages to the page cache. */
if (vm_shared && !is_continue) {
@@ -6271,7 +6285,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
* hugetlb_fault_mutex_table that here must be hold by
* the caller.
*/
- ret = hugetlb_add_to_page_cache(page, mapping, idx);
+ ret = hugetlb_add_to_page_cache(&folio->page, mapping, idx);
if (ret)
goto out_release_nounlock;
page_in_pagecache = true;
@@ -6280,7 +6294,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
ptl = huge_pte_lock(h, dst_mm, dst_pte);
ret = -EIO;
- if (PageHWPoison(page))
+ if (folio_test_hwpoison(folio))
goto out_release_unlock;
/*
@@ -6293,9 +6307,9 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
goto out_release_unlock;
if (page_in_pagecache)
- page_dup_file_rmap(page, true);
+ page_dup_file_rmap(&folio->page, true);
else
- hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
+ hugepage_add_new_anon_rmap(folio, dst_vma, dst_addr);
/*
* For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY
@@ -6306,7 +6320,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
else
writable = dst_vma->vm_flags & VM_WRITE;
- _dst_pte = make_huge_pte(dst_vma, page, writable);
+ _dst_pte = make_huge_pte(dst_vma, &folio->page, writable);
/*
* Always mark UFFDIO_COPY page dirty; note that this may not be
* extremely important for hugetlbfs for now since swapping is not
@@ -6328,20 +6342,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
spin_unlock(ptl);
if (!is_continue)
- SetHPageMigratable(page);
+ folio_set_hugetlb_migratable(folio);
if (vm_shared || is_continue)
- unlock_page(page);
+ folio_unlock(folio);
ret = 0;
out:
return ret;
out_release_unlock:
spin_unlock(ptl);
if (vm_shared || is_continue)
- unlock_page(page);
+ folio_unlock(folio);
out_release_nounlock:
if (!page_in_pagecache)
- restore_reserve_on_error(h, dst_vma, dst_addr, page);
- put_page(page);
+ restore_reserve_on_error(h, dst_vma, dst_addr, &folio->page);
+ folio_put(folio);
goto out;
}
#endif /* CONFIG_USERFAULTFD */
diff --git a/mm/rmap.c b/mm/rmap.c
index 948ca17a96ad..e6d94bd19879 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2547,15 +2547,13 @@ void hugepage_add_anon_rmap(struct page *page, struct vm_area_struct *vma,
!!(flags & RMAP_EXCLUSIVE));
}
-void hugepage_add_new_anon_rmap(struct page *page,
+void hugepage_add_new_anon_rmap(struct folio *folio,
struct vm_area_struct *vma, unsigned long address)
{
- struct folio *folio = page_folio(page);
-
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
/* increment count (starts at -1) */
atomic_set(&folio->_entire_mapcount, 0);
folio_clear_hugetlb_restore_reserve(folio);
- __page_set_anon_rmap(folio, page, vma, address, 1);
+ __page_set_anon_rmap(folio, &folio->page, vma, address, 1);
}
#endif /* CONFIG_HUGETLB_PAGE */
--
2.39.0
On Thu, Jan 19, 2023 at 01:14:41PM -0800, Sidhartha Kumar wrote: > @@ -5599,9 +5603,9 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, > goto out_release_all; > } > > - copy_user_huge_page(new_page, old_page, address, vma, > + copy_user_huge_page(&new_folio->page, old_page, address, vma, > pages_per_huge_page(h)); We have a folio_copy(), but it feels to me like we need a folio_copy_user() so that we can use copy_user_page() on machines with virtual caches. > @@ -6176,6 +6186,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, > spinlock_t *ptl; > int ret = -ENOMEM; > struct page *page; > + struct folio *folio = NULL; > int writable; > bool page_in_pagecache = false; > > @@ -6251,12 +6262,15 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, > *pagep = NULL; > } > > + if (page) > + folio = page_folio(page); > + > /* > - * The memory barrier inside __SetPageUptodate makes sure that > + * The memory barrier inside __folio_mark_uptodate makes sure that > * preceding stores to the page contents become visible before > * the set_pte_at() write. > */ > - __SetPageUptodate(page); > + __folio_mark_uptodate(folio); I suggest that "page" can never be NULL or __SetPageUptodate() would have crashed.
On 1/19/23 10:00 PM, Matthew Wilcox wrote: > On Thu, Jan 19, 2023 at 01:14:41PM -0800, Sidhartha Kumar wrote: >> @@ -5599,9 +5603,9 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, >> goto out_release_all; >> } >> >> - copy_user_huge_page(new_page, old_page, address, vma, >> + copy_user_huge_page(&new_folio->page, old_page, address, vma, >> pages_per_huge_page(h)); > > We have a folio_copy(), but it feels to me like we need a > folio_copy_user() so that we can use copy_user_page() on machines > with virtual caches. > >> @@ -6176,6 +6186,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, >> spinlock_t *ptl; >> int ret = -ENOMEM; >> struct page *page; >> + struct folio *folio = NULL; >> int writable; >> bool page_in_pagecache = false; >> >> @@ -6251,12 +6262,15 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, >> *pagep = NULL; >> } >> >> + if (page) >> + folio = page_folio(page); >> + >> /* >> - * The memory barrier inside __SetPageUptodate makes sure that >> + * The memory barrier inside __folio_mark_uptodate makes sure that >> * preceding stores to the page contents become visible before >> * the set_pte_at() write. >> */ >> - __SetPageUptodate(page); >> + __folio_mark_uptodate(folio); > Hi Matthew, In the snippet: page = alloc_huge_page(dst_vma, dst_addr, 0); if (IS_ERR(page)) { put_page(*pagep); ret = -ENOMEM; *pagep = NULL; goto out; } copy_user_huge_page(page, *pagep, dst_addr, dst_vma, pages_per_huge_page(h)); I thought the IS_ERR() call does not handle the NULL case and is a check for high memory addresses, and copy_user_huge_page() path does not seem to handle the NULL case as well but alloc_huge_page() can possibly return NULL so I was unsure about how to handle the folio conversion. > I suggest that "page" can never be NULL or __SetPageUptodate() would > have crashed. >
On Fri, Jan 20, 2023 at 12:45:38PM -0800, Sidhartha Kumar wrote: > > > @@ -6176,6 +6186,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, > > > spinlock_t *ptl; > > > int ret = -ENOMEM; > > > struct page *page; > > > + struct folio *folio = NULL; > > > int writable; > > > bool page_in_pagecache = false; > > > @@ -6251,12 +6262,15 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, > > > *pagep = NULL; > > > } > > > + if (page) > > > + folio = page_folio(page); > > > + > > > /* > > > - * The memory barrier inside __SetPageUptodate makes sure that > > > + * The memory barrier inside __folio_mark_uptodate makes sure that > > > * preceding stores to the page contents become visible before > > > * the set_pte_at() write. > > > */ > > > - __SetPageUptodate(page); > > > + __folio_mark_uptodate(folio); > > > > Hi Matthew, > > In the snippet: > > page = alloc_huge_page(dst_vma, dst_addr, 0); > if (IS_ERR(page)) { > put_page(*pagep); > ret = -ENOMEM; > *pagep = NULL; > goto out; > } > copy_user_huge_page(page, *pagep, dst_addr, dst_vma, > pages_per_huge_page(h)); > > I thought the IS_ERR() call does not handle the NULL case and is a check for > high memory addresses, and copy_user_huge_page() path does not seem to > handle the NULL case as well but alloc_huge_page() can possibly return NULL > so I was unsure about how to handle the folio conversion. I'm not sure how alloc_huge_page() can return NULL. It seems like it returns ERR_PTR(-ENOSPC) or ERR_PTR(-ENOMEM) if it cannot allocate memory?
On 1/20/23 1:17 PM, Matthew Wilcox wrote: > On Fri, Jan 20, 2023 at 12:45:38PM -0800, Sidhartha Kumar wrote: >>>> @@ -6176,6 +6186,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, >>>> spinlock_t *ptl; >>>> int ret = -ENOMEM; >>>> struct page *page; >>>> + struct folio *folio = NULL; >>>> int writable; >>>> bool page_in_pagecache = false; >>>> @@ -6251,12 +6262,15 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, >>>> *pagep = NULL; >>>> } >>>> + if (page) >>>> + folio = page_folio(page); >>>> + >>>> /* >>>> - * The memory barrier inside __SetPageUptodate makes sure that >>>> + * The memory barrier inside __folio_mark_uptodate makes sure that >>>> * preceding stores to the page contents become visible before >>>> * the set_pte_at() write. >>>> */ >>>> - __SetPageUptodate(page); >>>> + __folio_mark_uptodate(folio); >>> >> >> Hi Matthew, >> >> In the snippet: >> >> page = alloc_huge_page(dst_vma, dst_addr, 0); >> if (IS_ERR(page)) { >> put_page(*pagep); >> ret = -ENOMEM; >> *pagep = NULL; >> goto out; >> } >> copy_user_huge_page(page, *pagep, dst_addr, dst_vma, >> pages_per_huge_page(h)); >> >> I thought the IS_ERR() call does not handle the NULL case and is a check for >> high memory addresses, and copy_user_huge_page() path does not seem to >> handle the NULL case as well but alloc_huge_page() can possibly return NULL >> so I was unsure about how to handle the folio conversion. > > I'm not sure how alloc_huge_page() can return NULL. It seems like it > returns ERR_PTR(-ENOSPC) or ERR_PTR(-ENOMEM) if it cannot allocate memory? > I see now, I agree that page cannot be NULL at the return from alloc_huge_page, I will make that change in v2. Thanks, Sidhartha Kumar
© 2016 - 2025 Red Hat, Inc.