hugetlb_wp() can use the struct vm_fault passed in from hugetlb_fault().
This alleviates the stack by consolidating 5 variables into a single
struct.
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
---
mm/hugetlb.c | 61 ++++++++++++++++++++++++++--------------------------
1 file changed, 30 insertions(+), 31 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index aca2f11b4138..d4f26947173e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5918,18 +5918,16 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
* Keep the pte_same checks anyway to make transition from the mutex easier.
*/
static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep, unsigned int flags,
- struct folio *pagecache_folio, spinlock_t *ptl,
+ struct folio *pagecache_folio,
struct vm_fault *vmf)
{
- const bool unshare = flags & FAULT_FLAG_UNSHARE;
- pte_t pte = huge_ptep_get(ptep);
+ const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
+ pte_t pte = huge_ptep_get(vmf->pte);
struct hstate *h = hstate_vma(vma);
struct folio *old_folio;
struct folio *new_folio;
int outside_reserve = 0;
vm_fault_t ret = 0;
- unsigned long haddr = address & huge_page_mask(h);
struct mmu_notifier_range range;
/*
@@ -5952,7 +5950,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
/* Let's take out MAP_SHARED mappings first. */
if (vma->vm_flags & VM_MAYSHARE) {
- set_huge_ptep_writable(vma, haddr, ptep);
+ set_huge_ptep_writable(vma, vmf->address, vmf->pte);
return 0;
}
@@ -5971,7 +5969,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
SetPageAnonExclusive(&old_folio->page);
}
if (likely(!unshare))
- set_huge_ptep_writable(vma, haddr, ptep);
+ set_huge_ptep_writable(vma, vmf->address, vmf->pte);
delayacct_wpcopy_end();
return 0;
@@ -5998,8 +5996,8 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
* Drop page table lock as buddy allocator may be called. It will
* be acquired again before returning to the caller, as expected.
*/
- spin_unlock(ptl);
- new_folio = alloc_hugetlb_folio(vma, haddr, outside_reserve);
+ spin_unlock(vmf->ptl);
+ new_folio = alloc_hugetlb_folio(vma, vmf->address, outside_reserve);
if (IS_ERR(new_folio)) {
/*
@@ -6024,19 +6022,21 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
*
* Reacquire both after unmap operation.
*/
- idx = vma_hugecache_offset(h, vma, haddr);
+ idx = vma_hugecache_offset(h, vma, vmf->address);
hash = hugetlb_fault_mutex_hash(mapping, idx);
hugetlb_vma_unlock_read(vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- unmap_ref_private(mm, vma, &old_folio->page, haddr);
+ unmap_ref_private(mm, vma, &old_folio->page,
+ vmf->address);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
hugetlb_vma_lock_read(vma);
- spin_lock(ptl);
- ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
- if (likely(ptep &&
- pte_same(huge_ptep_get(ptep), pte)))
+ spin_lock(vmf->ptl);
+ vmf->pte = hugetlb_walk(vma, vmf->address,
+ huge_page_size(h));
+ if (likely(vmf->pte &&
+ pte_same(huge_ptep_get(vmf->pte), pte)))
goto retry_avoidcopy;
/*
* race occurs while re-acquiring page table
@@ -6058,37 +6058,38 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(ret))
goto out_release_all;
- if (copy_user_large_folio(new_folio, old_folio, address, vma)) {
+ if (copy_user_large_folio(new_folio, old_folio, vmf->real_address, vma)) {
ret = VM_FAULT_HWPOISON_LARGE;
goto out_release_all;
}
__folio_mark_uptodate(new_folio);
- mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, haddr,
- haddr + huge_page_size(h));
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, vmf->address,
+ vmf->address + huge_page_size(h));
mmu_notifier_invalidate_range_start(&range);
/*
* Retake the page table lock to check for racing updates
* before the page tables are altered
*/
- spin_lock(ptl);
- ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
- if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
+ spin_lock(vmf->ptl);
+ vmf->pte = hugetlb_walk(vma, vmf->address, huge_page_size(h));
+ if (likely(vmf->pte && pte_same(huge_ptep_get(vmf->pte), pte))) {
pte_t newpte = make_huge_pte(vma, &new_folio->page, !unshare);
/* Break COW or unshare */
- huge_ptep_clear_flush(vma, haddr, ptep);
+ huge_ptep_clear_flush(vma, vmf->address, vmf->pte);
hugetlb_remove_rmap(old_folio);
- hugetlb_add_new_anon_rmap(new_folio, vma, haddr);
+ hugetlb_add_new_anon_rmap(new_folio, vma, vmf->address);
if (huge_pte_uffd_wp(pte))
newpte = huge_pte_mkuffd_wp(newpte);
- set_huge_pte_at(mm, haddr, ptep, newpte, huge_page_size(h));
+ set_huge_pte_at(mm, vmf->address, vmf->pte, newpte,
+ huge_page_size(h));
folio_set_hugetlb_migratable(new_folio);
/* Make the old page be freed below */
new_folio = old_folio;
}
- spin_unlock(ptl);
+ spin_unlock(vmf->ptl);
mmu_notifier_invalidate_range_end(&range);
out_release_all:
/*
@@ -6096,12 +6097,12 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
* unshare)
*/
if (new_folio != old_folio)
- restore_reserve_on_error(h, vma, haddr, new_folio);
+ restore_reserve_on_error(h, vma, vmf->address, new_folio);
folio_put(new_folio);
out_release_old:
folio_put(old_folio);
- spin_lock(ptl); /* Caller expects lock to be held */
+ spin_lock(vmf->ptl); /* Caller expects lock to be held */
delayacct_wpcopy_end();
return ret;
@@ -6365,8 +6366,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
hugetlb_count_add(pages_per_huge_page(h), mm);
if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
/* Optimization, do the COW without a second fault */
- ret = hugetlb_wp(mm, vma, vmf->real_address, vmf->pte,
- vmf->flags, folio, vmf->ptl, vmf);
+ ret = hugetlb_wp(mm, vma, folio, vmf);
}
spin_unlock(vmf->ptl);
@@ -6579,8 +6579,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
if (!huge_pte_write(vmf.orig_pte)) {
- ret = hugetlb_wp(mm, vma, address, vmf.pte, flags,
- pagecache_folio, vmf.ptl, &vmf);
+ ret = hugetlb_wp(mm, vma, pagecache_folio, &vmf);
goto out_put_page;
} else if (likely(flags & FAULT_FLAG_WRITE)) {
vmf.orig_pte = huge_pte_mkdirty(vmf.orig_pte);
--
2.43.0
On 2024/4/2 04:26, Vishal Moola (Oracle) wrote:
> hugetlb_wp() can use the struct vm_fault passed in from hugetlb_fault().
> This alleviates the stack by consolidating 5 variables into a single
> struct.
>
> Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
> ---
> mm/hugetlb.c | 61 ++++++++++++++++++++++++++--------------------------
> 1 file changed, 30 insertions(+), 31 deletions(-)
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index aca2f11b4138..d4f26947173e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -5918,18 +5918,16 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
> * Keep the pte_same checks anyway to make transition from the mutex easier.
> */
> static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
> - unsigned long address, pte_t *ptep, unsigned int flags,
> - struct folio *pagecache_folio, spinlock_t *ptl,
> + struct folio *pagecache_folio,
The same as comment in the previous thread.
Muchun,
Thanks.
> struct vm_fault *vmf)
> {
> - const bool unshare = flags & FAULT_FLAG_UNSHARE;
> - pte_t pte = huge_ptep_get(ptep);
> + const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
> + pte_t pte = huge_ptep_get(vmf->pte);
> struct hstate *h = hstate_vma(vma);
> struct folio *old_folio;
> struct folio *new_folio;
> int outside_reserve = 0;
> vm_fault_t ret = 0;
> - unsigned long haddr = address & huge_page_mask(h);
> struct mmu_notifier_range range;
>
> /*
> @@ -5952,7 +5950,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
>
> /* Let's take out MAP_SHARED mappings first. */
> if (vma->vm_flags & VM_MAYSHARE) {
> - set_huge_ptep_writable(vma, haddr, ptep);
> + set_huge_ptep_writable(vma, vmf->address, vmf->pte);
> return 0;
> }
>
> @@ -5971,7 +5969,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
> SetPageAnonExclusive(&old_folio->page);
> }
> if (likely(!unshare))
> - set_huge_ptep_writable(vma, haddr, ptep);
> + set_huge_ptep_writable(vma, vmf->address, vmf->pte);
>
> delayacct_wpcopy_end();
> return 0;
> @@ -5998,8 +5996,8 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
> * Drop page table lock as buddy allocator may be called. It will
> * be acquired again before returning to the caller, as expected.
> */
> - spin_unlock(ptl);
> - new_folio = alloc_hugetlb_folio(vma, haddr, outside_reserve);
> + spin_unlock(vmf->ptl);
> + new_folio = alloc_hugetlb_folio(vma, vmf->address, outside_reserve);
>
> if (IS_ERR(new_folio)) {
> /*
> @@ -6024,19 +6022,21 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
> *
> * Reacquire both after unmap operation.
> */
> - idx = vma_hugecache_offset(h, vma, haddr);
> + idx = vma_hugecache_offset(h, vma, vmf->address);
> hash = hugetlb_fault_mutex_hash(mapping, idx);
> hugetlb_vma_unlock_read(vma);
> mutex_unlock(&hugetlb_fault_mutex_table[hash]);
>
> - unmap_ref_private(mm, vma, &old_folio->page, haddr);
> + unmap_ref_private(mm, vma, &old_folio->page,
> + vmf->address);
>
> mutex_lock(&hugetlb_fault_mutex_table[hash]);
> hugetlb_vma_lock_read(vma);
> - spin_lock(ptl);
> - ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
> - if (likely(ptep &&
> - pte_same(huge_ptep_get(ptep), pte)))
> + spin_lock(vmf->ptl);
> + vmf->pte = hugetlb_walk(vma, vmf->address,
> + huge_page_size(h));
> + if (likely(vmf->pte &&
> + pte_same(huge_ptep_get(vmf->pte), pte)))
> goto retry_avoidcopy;
> /*
> * race occurs while re-acquiring page table
> @@ -6058,37 +6058,38 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
> if (unlikely(ret))
> goto out_release_all;
>
> - if (copy_user_large_folio(new_folio, old_folio, address, vma)) {
> + if (copy_user_large_folio(new_folio, old_folio, vmf->real_address, vma)) {
> ret = VM_FAULT_HWPOISON_LARGE;
> goto out_release_all;
> }
> __folio_mark_uptodate(new_folio);
>
> - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, haddr,
> - haddr + huge_page_size(h));
> + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, vmf->address,
> + vmf->address + huge_page_size(h));
> mmu_notifier_invalidate_range_start(&range);
>
> /*
> * Retake the page table lock to check for racing updates
> * before the page tables are altered
> */
> - spin_lock(ptl);
> - ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
> - if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
> + spin_lock(vmf->ptl);
> + vmf->pte = hugetlb_walk(vma, vmf->address, huge_page_size(h));
> + if (likely(vmf->pte && pte_same(huge_ptep_get(vmf->pte), pte))) {
> pte_t newpte = make_huge_pte(vma, &new_folio->page, !unshare);
>
> /* Break COW or unshare */
> - huge_ptep_clear_flush(vma, haddr, ptep);
> + huge_ptep_clear_flush(vma, vmf->address, vmf->pte);
> hugetlb_remove_rmap(old_folio);
> - hugetlb_add_new_anon_rmap(new_folio, vma, haddr);
> + hugetlb_add_new_anon_rmap(new_folio, vma, vmf->address);
> if (huge_pte_uffd_wp(pte))
> newpte = huge_pte_mkuffd_wp(newpte);
> - set_huge_pte_at(mm, haddr, ptep, newpte, huge_page_size(h));
> + set_huge_pte_at(mm, vmf->address, vmf->pte, newpte,
> + huge_page_size(h));
> folio_set_hugetlb_migratable(new_folio);
> /* Make the old page be freed below */
> new_folio = old_folio;
> }
> - spin_unlock(ptl);
> + spin_unlock(vmf->ptl);
> mmu_notifier_invalidate_range_end(&range);
> out_release_all:
> /*
> @@ -6096,12 +6097,12 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
> * unshare)
> */
> if (new_folio != old_folio)
> - restore_reserve_on_error(h, vma, haddr, new_folio);
> + restore_reserve_on_error(h, vma, vmf->address, new_folio);
> folio_put(new_folio);
> out_release_old:
> folio_put(old_folio);
>
> - spin_lock(ptl); /* Caller expects lock to be held */
> + spin_lock(vmf->ptl); /* Caller expects lock to be held */
>
> delayacct_wpcopy_end();
> return ret;
> @@ -6365,8 +6366,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
> hugetlb_count_add(pages_per_huge_page(h), mm);
> if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
> /* Optimization, do the COW without a second fault */
> - ret = hugetlb_wp(mm, vma, vmf->real_address, vmf->pte,
> - vmf->flags, folio, vmf->ptl, vmf);
> + ret = hugetlb_wp(mm, vma, folio, vmf);
> }
>
> spin_unlock(vmf->ptl);
> @@ -6579,8 +6579,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>
> if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
> if (!huge_pte_write(vmf.orig_pte)) {
> - ret = hugetlb_wp(mm, vma, address, vmf.pte, flags,
> - pagecache_folio, vmf.ptl, &vmf);
> + ret = hugetlb_wp(mm, vma, pagecache_folio, &vmf);
> goto out_put_page;
> } else if (likely(flags & FAULT_FLAG_WRITE)) {
> vmf.orig_pte = huge_pte_mkdirty(vmf.orig_pte);
On Sun, Apr 07, 2024 at 05:12:42PM +0800, Muchun Song wrote:
>
>
> On 2024/4/2 04:26, Vishal Moola (Oracle) wrote:
> > hugetlb_wp() can use the struct vm_fault passed in from hugetlb_fault().
> > This alleviates the stack by consolidating 5 variables into a single
> > struct.
> >
> > Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
> > ---
> > mm/hugetlb.c | 61 ++++++++++++++++++++++++++--------------------------
> > 1 file changed, 30 insertions(+), 31 deletions(-)
> >
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index aca2f11b4138..d4f26947173e 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -5918,18 +5918,16 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
> > * Keep the pte_same checks anyway to make transition from the mutex easier.
> > */
> > static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
> > - unsigned long address, pte_t *ptep, unsigned int flags,
> > - struct folio *pagecache_folio, spinlock_t *ptl,
> > + struct folio *pagecache_folio,
>
> The same as comment in the previous thread.
And fold the attached patch into here as well please Andrew?
From f4adcf13ecc15a6733af43649756e53457078221 Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Mon, 8 Apr 2024 10:21:44 -0700
Subject: [PATCH 2/2] hugetlb: Simplyfy hugetlb_wp() arguments
To simplify the function arguments, as suggested by Oscar and Muchun.
Suggested-by: Muchun Song <muchun.song@linux.dev>
Suggested-by: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
---
mm/hugetlb.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 05fe610f4699..0d96a41efde8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5915,10 +5915,11 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
* cannot race with other handlers or page migration.
* Keep the pte_same checks anyway to make transition from the mutex easier.
*/
-static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
- struct folio *pagecache_folio,
+static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
+ struct mm_struct *mm = vma->vm_mm;
const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
pte_t pte = huge_ptep_get(vmf->pte);
struct hstate *h = hstate_vma(vma);
@@ -6364,7 +6365,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
hugetlb_count_add(pages_per_huge_page(h), mm);
if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
/* Optimization, do the COW without a second fault */
- ret = hugetlb_wp(mm, vma, folio, vmf);
+ ret = hugetlb_wp(folio, vmf);
}
spin_unlock(vmf->ptl);
@@ -6577,7 +6578,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
if (!huge_pte_write(vmf.orig_pte)) {
- ret = hugetlb_wp(mm, vma, pagecache_folio, &vmf);
+ ret = hugetlb_wp(pagecache_folio, &vmf);
goto out_put_page;
} else if (likely(flags & FAULT_FLAG_WRITE)) {
vmf.orig_pte = huge_pte_mkdirty(vmf.orig_pte);
--
2.43.0
On Mon, Apr 08, 2024 at 10:47:12AM -0700, Vishal Moola wrote:
> -static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
> - struct folio *pagecache_folio,
> +static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
> struct vm_fault *vmf)
> {
> + struct vm_area_struct *vma = vmf->vma;
> + struct mm_struct *mm = vma->vm_mm;
I think 'vmf' should be the first parameter, not the second.
Compare:
static vm_fault_t do_page_mkwrite(struct vm_fault *vmf, struct folio *folio)
static inline void wp_page_reuse(struct vm_fault *vmf, struct folio *folio)
static vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf, struct folio *folio)
static vm_fault_t wp_page_shared(struct vm_fault *vmf, struct folio *folio)
vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
void set_pte_range(struct vm_fault *vmf, struct folio *folio,
struct page *page, unsigned int nr, unsigned long addr)
int numa_migrate_prep(struct folio *folio, struct vm_fault *vmf,
unsigned long addr, int page_nid, int *flags)
static void numa_rebuild_single_mapping(struct vm_fault *vmf, struct vm_area_struct *vma,
unsigned long fault_addr, pte_t *fault_pte,
bool writable)
static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_struct *vma,
struct folio *folio, pte_t fault_pte,
bool ignore_writable, bool pte_write_upgrade)
static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
numa_migrate_prep() is the only one which doesn't have vmf as the first
param. It's a subtle inconsistency, but one you notice after a while
... then wish you'd done right the first time, but can't quite bring
yourself to submit a patch to fix ;-)
On Mon, Apr 01, 2024 at 01:26:51PM -0700, Vishal Moola (Oracle) wrote: > hugetlb_wp() can use the struct vm_fault passed in from hugetlb_fault(). > This alleviates the stack by consolidating 5 variables into a single > struct. > > Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> Reviewed-by: Oscar Salvador <osalvador@suse.de> -- Oscar Salvador SUSE Labs
© 2016 - 2026 Red Hat, Inc.