[PATCH 3/9] mm/rmap: refactor lazyfree unmap commit path to commit_ttu_lazyfree_folio()

Dev Jain posted 9 patches 1 month ago
[PATCH 3/9] mm/rmap: refactor lazyfree unmap commit path to commit_ttu_lazyfree_folio()
Posted by Dev Jain 1 month ago
Clean up the code by refactoring the post-pte-clearing path of lazyfree
folio unmapping, into commit_ttu_lazyfree_folio().

No functional change is intended.

Signed-off-by: Dev Jain <dev.jain@arm.com>
---
 mm/rmap.c | 93 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 54 insertions(+), 39 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index 1fa020edd954a..a61978141ee3f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1966,6 +1966,57 @@ static inline unsigned int folio_unmap_pte_batch(struct folio *folio,
 				     FPB_RESPECT_WRITE | FPB_RESPECT_SOFT_DIRTY);
 }
 
+static inline int commit_ttu_lazyfree_folio(struct vm_area_struct *vma,
+		struct folio *folio, unsigned long address, pte_t *ptep,
+		pte_t pteval, long nr_pages)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int ref_count, map_count;
+
+	/*
+	 * Synchronize with gup_pte_range():
+	 * - clear PTE; barrier; read refcount
+	 * - inc refcount; barrier; read PTE
+	 */
+	smp_mb();
+
+	ref_count = folio_ref_count(folio);
+	map_count = folio_mapcount(folio);
+
+	/*
+	 * Order reads for page refcount and dirty flag
+	 * (see comments in __remove_mapping()).
+	 */
+	smp_rmb();
+
+	if (folio_test_dirty(folio) && !(vma->vm_flags & VM_DROPPABLE)) {
+		/*
+		 * redirtied either using the page table or a previously
+		 * obtained GUP reference.
+		 */
+		set_ptes(mm, address, ptep, pteval, nr_pages);
+		folio_set_swapbacked(folio);
+		return 1;
+	}
+
+	if (ref_count != 1 + map_count) {
+		/*
+		 * Additional reference. Could be a GUP reference or any
+		 * speculative reference. GUP users must mark the folio
+		 * dirty if there was a modification. This folio cannot be
+		 * reclaimed right now either way, so act just like nothing
+		 * happened.
+		 * We'll come back here later and detect if the folio was
+		 * dirtied when the additional reference is gone.
+		 */
+		set_ptes(mm, address, ptep, pteval, nr_pages);
+		return 1;
+	}
+
+	add_mm_counter(mm, MM_ANONPAGES, -nr_pages);
+	return 0;
+}
+
 /*
  * @arg: enum ttu_flags will be passed to this argument
  */
@@ -2227,46 +2278,10 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 
 			/* MADV_FREE page check */
 			if (!folio_test_swapbacked(folio)) {
-				int ref_count, map_count;
-
-				/*
-				 * Synchronize with gup_pte_range():
-				 * - clear PTE; barrier; read refcount
-				 * - inc refcount; barrier; read PTE
-				 */
-				smp_mb();
-
-				ref_count = folio_ref_count(folio);
-				map_count = folio_mapcount(folio);
-
-				/*
-				 * Order reads for page refcount and dirty flag
-				 * (see comments in __remove_mapping()).
-				 */
-				smp_rmb();
-
-				if (folio_test_dirty(folio) && !(vma->vm_flags & VM_DROPPABLE)) {
-					/*
-					 * redirtied either using the page table or a previously
-					 * obtained GUP reference.
-					 */
-					set_ptes(mm, address, pvmw.pte, pteval, nr_pages);
-					folio_set_swapbacked(folio);
+				if (commit_ttu_lazyfree_folio(vma, folio, address,
+							      pvmw.pte, pteval,
+							      nr_pages))
 					goto walk_abort;
-				} else if (ref_count != 1 + map_count) {
-					/*
-					 * Additional reference. Could be a GUP reference or any
-					 * speculative reference. GUP users must mark the folio
-					 * dirty if there was a modification. This folio cannot be
-					 * reclaimed right now either way, so act just like nothing
-					 * happened.
-					 * We'll come back here later and detect if the folio was
-					 * dirtied when the additional reference is gone.
-					 */
-					set_ptes(mm, address, pvmw.pte, pteval, nr_pages);
-					goto walk_abort;
-				}
-				add_mm_counter(mm, MM_ANONPAGES, -nr_pages);
 				goto discard;
 			}
 
-- 
2.34.1
Re: [PATCH 3/9] mm/rmap: refactor lazyfree unmap commit path to commit_ttu_lazyfree_folio()
Posted by Lorenzo Stoakes (Oracle) 4 weeks, 1 day ago
On Tue, Mar 10, 2026 at 01:00:07PM +0530, Dev Jain wrote:
> Clean up the code by refactoring the post-pte-clearing path of lazyfree
> folio unmapping, into commit_ttu_lazyfree_folio().
>
> No functional change is intended.
>
> Signed-off-by: Dev Jain <dev.jain@arm.com>

This is a good idea, and we need more refactoring like this in the rmap code,
but comments/nits below.

> ---
>  mm/rmap.c | 93 ++++++++++++++++++++++++++++++++-----------------------
>  1 file changed, 54 insertions(+), 39 deletions(-)
>
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 1fa020edd954a..a61978141ee3f 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1966,6 +1966,57 @@ static inline unsigned int folio_unmap_pte_batch(struct folio *folio,
>  				     FPB_RESPECT_WRITE | FPB_RESPECT_SOFT_DIRTY);
>  }
>
> +static inline int commit_ttu_lazyfree_folio(struct vm_area_struct *vma,

Strange name, maybe lazyfree_range()? Not sure what ttu has to do with
anything...

> +		struct folio *folio, unsigned long address, pte_t *ptep,
> +		pte_t pteval, long nr_pages)

That long nr_pages is really grating now...

> +{

Come on Dev, it's 2026, why on earth are you returning an integer and not a
bool?

Also it would make sense for this to return false if something breaks, otherwise
true.

> +	struct mm_struct *mm = vma->vm_mm;
> +	int ref_count, map_count;
> +
> +	/*
> +	 * Synchronize with gup_pte_range():
> +	 * - clear PTE; barrier; read refcount
> +	 * - inc refcount; barrier; read PTE
> +	 */
> +	smp_mb();
> +
> +	ref_count = folio_ref_count(folio);
> +	map_count = folio_mapcount(folio);
> +
> +	/*
> +	 * Order reads for page refcount and dirty flag
> +	 * (see comments in __remove_mapping()).
> +	 */
> +	smp_rmb();
> +
> +	if (folio_test_dirty(folio) && !(vma->vm_flags & VM_DROPPABLE)) {
> +		/*
> +		 * redirtied either using the page table or a previously
> +		 * obtained GUP reference.
> +		 */
> +		set_ptes(mm, address, ptep, pteval, nr_pages);
> +		folio_set_swapbacked(folio);
> +		return 1;
> +	}
> +
> +	if (ref_count != 1 + map_count) {
> +		/*
> +		 * Additional reference. Could be a GUP reference or any
> +		 * speculative reference. GUP users must mark the folio
> +		 * dirty if there was a modification. This folio cannot be
> +		 * reclaimed right now either way, so act just like nothing
> +		 * happened.
> +		 * We'll come back here later and detect if the folio was
> +		 * dirtied when the additional reference is gone.
> +		 */
> +		set_ptes(mm, address, ptep, pteval, nr_pages);
> +		return 1;
> +	}
> +
> +	add_mm_counter(mm, MM_ANONPAGES, -nr_pages);
> +	return 0;
> +}
> +
>  /*
>   * @arg: enum ttu_flags will be passed to this argument
>   */
> @@ -2227,46 +2278,10 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
>
>  			/* MADV_FREE page check */
>  			if (!folio_test_swapbacked(folio)) {
> -				int ref_count, map_count;
> -
> -				/*
> -				 * Synchronize with gup_pte_range():
> -				 * - clear PTE; barrier; read refcount
> -				 * - inc refcount; barrier; read PTE
> -				 */
> -				smp_mb();
> -
> -				ref_count = folio_ref_count(folio);
> -				map_count = folio_mapcount(folio);
> -
> -				/*
> -				 * Order reads for page refcount and dirty flag
> -				 * (see comments in __remove_mapping()).
> -				 */
> -				smp_rmb();
> -
> -				if (folio_test_dirty(folio) && !(vma->vm_flags & VM_DROPPABLE)) {
> -					/*
> -					 * redirtied either using the page table or a previously
> -					 * obtained GUP reference.
> -					 */
> -					set_ptes(mm, address, pvmw.pte, pteval, nr_pages);
> -					folio_set_swapbacked(folio);
> +				if (commit_ttu_lazyfree_folio(vma, folio, address,
> +							      pvmw.pte, pteval,
> +							      nr_pages))

With above corrections this would be:

	if (!lazyfree_range(vma, folio, address, pvme.pte, pteval, nr_pages))
	   ...

>  					goto walk_abort;
> -				} else if (ref_count != 1 + map_count) {
> -					/*
> -					 * Additional reference. Could be a GUP reference or any
> -					 * speculative reference. GUP users must mark the folio
> -					 * dirty if there was a modification. This folio cannot be
> -					 * reclaimed right now either way, so act just like nothing
> -					 * happened.
> -					 * We'll come back here later and detect if the folio was
> -					 * dirtied when the additional reference is gone.
> -					 */
> -					set_ptes(mm, address, pvmw.pte, pteval, nr_pages);
> -					goto walk_abort;
> -				}
> -				add_mm_counter(mm, MM_ANONPAGES, -nr_pages);
>  				goto discard;
>  			}
>
> --
> 2.34.1
>

Thanks, Lorenzo
Re: [PATCH 3/9] mm/rmap: refactor lazyfree unmap commit path to commit_ttu_lazyfree_folio()
Posted by Dev Jain 4 weeks, 1 day ago

On 10/03/26 1:49 pm, Lorenzo Stoakes (Oracle) wrote:
> On Tue, Mar 10, 2026 at 01:00:07PM +0530, Dev Jain wrote:
>> Clean up the code by refactoring the post-pte-clearing path of lazyfree
>> folio unmapping, into commit_ttu_lazyfree_folio().
>>
>> No functional change is intended.
>>
>> Signed-off-by: Dev Jain <dev.jain@arm.com>
> 
> This is a good idea, and we need more refactoring like this in the rmap code,
> but comments/nits below.
> 
>> ---
>>  mm/rmap.c | 93 ++++++++++++++++++++++++++++++++-----------------------
>>  1 file changed, 54 insertions(+), 39 deletions(-)
>>
>> diff --git a/mm/rmap.c b/mm/rmap.c
>> index 1fa020edd954a..a61978141ee3f 100644
>> --- a/mm/rmap.c
>> +++ b/mm/rmap.c
>> @@ -1966,6 +1966,57 @@ static inline unsigned int folio_unmap_pte_batch(struct folio *folio,
>>  				     FPB_RESPECT_WRITE | FPB_RESPECT_SOFT_DIRTY);
>>  }
>>
>> +static inline int commit_ttu_lazyfree_folio(struct vm_area_struct *vma,
> 
> Strange name, maybe lazyfree_range()? Not sure what ttu has to do with

ttu means try_to_unmap, just like it is used in TTU_SYNC,
TTU_SPLIT_HUGE_PMD, etc. So personally I really like the name, it reads
"commit the try-to-unmap of a lazyfree folio". The "commit" comes because
the pte clearing has already happened, so now we are deciding if at all
to back-off and restore the ptes.

> anything...
> 
>> +		struct folio *folio, unsigned long address, pte_t *ptep,
>> +		pte_t pteval, long nr_pages)
> 
> That long nr_pages is really grating now...

Reading the discussion on patch 1, I'll convert this to unsigned long.

> 
>> +{
> 
> Come on Dev, it's 2026, why on earth are you returning an integer and not a
> bool?
> 
> Also it would make sense for this to return false if something breaks, otherwise
> true.

Yes I was confused on which one of the options to choose :). Since the
function does a lot more than just test some functionality (which is what
boolean functions usually do) I was feeling weird when returning bool.
But yeah alright, I'll convert this to bool.

> 
>> +	struct mm_struct *mm = vma->vm_mm;
>> +	int ref_count, map_count;
>> +
>> +	/*
>> +	 * Synchronize with gup_pte_range():
>> +	 * - clear PTE; barrier; read refcount
>> +	 * - inc refcount; barrier; read PTE
>> +	 */
>> +	smp_mb();
>> +
>> +	ref_count = folio_ref_count(folio);
>> +	map_count = folio_mapcount(folio);
>> +
>> +	/*
>> +	 * Order reads for page refcount and dirty flag
>> +	 * (see comments in __remove_mapping()).
>> +	 */
>> +	smp_rmb();
>> +
>> +	if (folio_test_dirty(folio) && !(vma->vm_flags & VM_DROPPABLE)) {
>> +		/*
>> +		 * redirtied either using the page table or a previously
>> +		 * obtained GUP reference.
>> +		 */
>> +		set_ptes(mm, address, ptep, pteval, nr_pages);
>> +		folio_set_swapbacked(folio);
>> +		return 1;
>> +	}
>> +
>> +	if (ref_count != 1 + map_count) {
>> +		/*
>> +		 * Additional reference. Could be a GUP reference or any
>> +		 * speculative reference. GUP users must mark the folio
>> +		 * dirty if there was a modification. This folio cannot be
>> +		 * reclaimed right now either way, so act just like nothing
>> +		 * happened.
>> +		 * We'll come back here later and detect if the folio was
>> +		 * dirtied when the additional reference is gone.
>> +		 */
>> +		set_ptes(mm, address, ptep, pteval, nr_pages);
>> +		return 1;
>> +	}
>> +
>> +	add_mm_counter(mm, MM_ANONPAGES, -nr_pages);
>> +	return 0;
>> +}
>> +
>>  /*
>>   * @arg: enum ttu_flags will be passed to this argument
>>   */
>> @@ -2227,46 +2278,10 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
>>
>>  			/* MADV_FREE page check */
>>  			if (!folio_test_swapbacked(folio)) {
>> -				int ref_count, map_count;
>> -
>> -				/*
>> -				 * Synchronize with gup_pte_range():
>> -				 * - clear PTE; barrier; read refcount
>> -				 * - inc refcount; barrier; read PTE
>> -				 */
>> -				smp_mb();
>> -
>> -				ref_count = folio_ref_count(folio);
>> -				map_count = folio_mapcount(folio);
>> -
>> -				/*
>> -				 * Order reads for page refcount and dirty flag
>> -				 * (see comments in __remove_mapping()).
>> -				 */
>> -				smp_rmb();
>> -
>> -				if (folio_test_dirty(folio) && !(vma->vm_flags & VM_DROPPABLE)) {
>> -					/*
>> -					 * redirtied either using the page table or a previously
>> -					 * obtained GUP reference.
>> -					 */
>> -					set_ptes(mm, address, pvmw.pte, pteval, nr_pages);
>> -					folio_set_swapbacked(folio);
>> +				if (commit_ttu_lazyfree_folio(vma, folio, address,
>> +							      pvmw.pte, pteval,
>> +							      nr_pages))
> 
> With above corrections this would be:
> 
> 	if (!lazyfree_range(vma, folio, address, pvme.pte, pteval, nr_pages))
> 	   ...
> 
>>  					goto walk_abort;
>> -				} else if (ref_count != 1 + map_count) {
>> -					/*
>> -					 * Additional reference. Could be a GUP reference or any
>> -					 * speculative reference. GUP users must mark the folio
>> -					 * dirty if there was a modification. This folio cannot be
>> -					 * reclaimed right now either way, so act just like nothing
>> -					 * happened.
>> -					 * We'll come back here later and detect if the folio was
>> -					 * dirtied when the additional reference is gone.
>> -					 */
>> -					set_ptes(mm, address, pvmw.pte, pteval, nr_pages);
>> -					goto walk_abort;
>> -				}
>> -				add_mm_counter(mm, MM_ANONPAGES, -nr_pages);
>>  				goto discard;
>>  			}
>>
>> --
>> 2.34.1
>>
> 
> Thanks, Lorenzo
>
Re: [PATCH 3/9] mm/rmap: refactor lazyfree unmap commit path to commit_ttu_lazyfree_folio()
Posted by Lorenzo Stoakes (Oracle) 2 weeks, 6 days ago
On Tue, Mar 10, 2026 at 02:12:45PM +0530, Dev Jain wrote:
>
>
> On 10/03/26 1:49 pm, Lorenzo Stoakes (Oracle) wrote:
> > On Tue, Mar 10, 2026 at 01:00:07PM +0530, Dev Jain wrote:
> >> Clean up the code by refactoring the post-pte-clearing path of lazyfree
> >> folio unmapping, into commit_ttu_lazyfree_folio().
> >>
> >> No functional change is intended.
> >>
> >> Signed-off-by: Dev Jain <dev.jain@arm.com>
> >
> > This is a good idea, and we need more refactoring like this in the rmap code,
> > but comments/nits below.
> >
> >> ---
> >>  mm/rmap.c | 93 ++++++++++++++++++++++++++++++++-----------------------
> >>  1 file changed, 54 insertions(+), 39 deletions(-)
> >>
> >> diff --git a/mm/rmap.c b/mm/rmap.c
> >> index 1fa020edd954a..a61978141ee3f 100644
> >> --- a/mm/rmap.c
> >> +++ b/mm/rmap.c
> >> @@ -1966,6 +1966,57 @@ static inline unsigned int folio_unmap_pte_batch(struct folio *folio,
> >>  				     FPB_RESPECT_WRITE | FPB_RESPECT_SOFT_DIRTY);
> >>  }
> >>
> >> +static inline int commit_ttu_lazyfree_folio(struct vm_area_struct *vma,
> >
> > Strange name, maybe lazyfree_range()? Not sure what ttu has to do with
>
> ttu means try_to_unmap, just like it is used in TTU_SYNC,
> TTU_SPLIT_HUGE_PMD, etc. So personally I really like the name, it reads
> "commit the try-to-unmap of a lazyfree folio". The "commit" comes because
> the pte clearing has already happened, so now we are deciding if at all
> to back-off and restore the ptes.

I absolutely hate the name, and nobody sane is going to read it like that
sorry :) I think this is a case of being too close to the work.

I also hate the overloading of 'lazyfree' which is actually MADV_FREE,
users don't know what lazyfree is and it's a bit of an overloaded term
(we've had discussions of a new 'lazy free' implementation at conferences
before).

Commit is overloaded everywhere I'm not sure it's even particularly
pertinent here.

Also it's not necessarily a folio is it? It could be a contpte range within
a folio so that's just misleading...

lazyfree_pte_range() or something like that seems better to me.

>
> > anything...
> >
> >> +		struct folio *folio, unsigned long address, pte_t *ptep,
> >> +		pte_t pteval, long nr_pages)
> >
> > That long nr_pages is really grating now...
>
> Reading the discussion on patch 1, I'll convert this to unsigned long.

Thanks!

>
> >
> >> +{
> >
> > Come on Dev, it's 2026, why on earth are you returning an integer and not a
> > bool?
> >
> > Also it would make sense for this to return false if something breaks, otherwise
> > true.
>
> Yes I was confused on which one of the options to choose :). Since the
> function does a lot more than just test some functionality (which is what
> boolean functions usually do) I was feeling weird when returning bool.
> But yeah alright, I'll convert this to bool.

Thanks!

Cheers, Lorenzo

>
> >
> >> +	struct mm_struct *mm = vma->vm_mm;
> >> +	int ref_count, map_count;
> >> +
> >> +	/*
> >> +	 * Synchronize with gup_pte_range():
> >> +	 * - clear PTE; barrier; read refcount
> >> +	 * - inc refcount; barrier; read PTE
> >> +	 */
> >> +	smp_mb();
> >> +
> >> +	ref_count = folio_ref_count(folio);
> >> +	map_count = folio_mapcount(folio);
> >> +
> >> +	/*
> >> +	 * Order reads for page refcount and dirty flag
> >> +	 * (see comments in __remove_mapping()).
> >> +	 */
> >> +	smp_rmb();
> >> +
> >> +	if (folio_test_dirty(folio) && !(vma->vm_flags & VM_DROPPABLE)) {
> >> +		/*
> >> +		 * redirtied either using the page table or a previously
> >> +		 * obtained GUP reference.
> >> +		 */
> >> +		set_ptes(mm, address, ptep, pteval, nr_pages);
> >> +		folio_set_swapbacked(folio);
> >> +		return 1;
> >> +	}
> >> +
> >> +	if (ref_count != 1 + map_count) {
> >> +		/*
> >> +		 * Additional reference. Could be a GUP reference or any
> >> +		 * speculative reference. GUP users must mark the folio
> >> +		 * dirty if there was a modification. This folio cannot be
> >> +		 * reclaimed right now either way, so act just like nothing
> >> +		 * happened.
> >> +		 * We'll come back here later and detect if the folio was
> >> +		 * dirtied when the additional reference is gone.
> >> +		 */
> >> +		set_ptes(mm, address, ptep, pteval, nr_pages);
> >> +		return 1;
> >> +	}
> >> +
> >> +	add_mm_counter(mm, MM_ANONPAGES, -nr_pages);
> >> +	return 0;
> >> +}
> >> +
> >>  /*
> >>   * @arg: enum ttu_flags will be passed to this argument
> >>   */
> >> @@ -2227,46 +2278,10 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
> >>
> >>  			/* MADV_FREE page check */
> >>  			if (!folio_test_swapbacked(folio)) {
> >> -				int ref_count, map_count;
> >> -
> >> -				/*
> >> -				 * Synchronize with gup_pte_range():
> >> -				 * - clear PTE; barrier; read refcount
> >> -				 * - inc refcount; barrier; read PTE
> >> -				 */
> >> -				smp_mb();
> >> -
> >> -				ref_count = folio_ref_count(folio);
> >> -				map_count = folio_mapcount(folio);
> >> -
> >> -				/*
> >> -				 * Order reads for page refcount and dirty flag
> >> -				 * (see comments in __remove_mapping()).
> >> -				 */
> >> -				smp_rmb();
> >> -
> >> -				if (folio_test_dirty(folio) && !(vma->vm_flags & VM_DROPPABLE)) {
> >> -					/*
> >> -					 * redirtied either using the page table or a previously
> >> -					 * obtained GUP reference.
> >> -					 */
> >> -					set_ptes(mm, address, pvmw.pte, pteval, nr_pages);
> >> -					folio_set_swapbacked(folio);
> >> +				if (commit_ttu_lazyfree_folio(vma, folio, address,
> >> +							      pvmw.pte, pteval,
> >> +							      nr_pages))
> >
> > With above corrections this would be:
> >
> > 	if (!lazyfree_range(vma, folio, address, pvme.pte, pteval, nr_pages))
> > 	   ...
> >
> >>  					goto walk_abort;
> >> -				} else if (ref_count != 1 + map_count) {
> >> -					/*
> >> -					 * Additional reference. Could be a GUP reference or any
> >> -					 * speculative reference. GUP users must mark the folio
> >> -					 * dirty if there was a modification. This folio cannot be
> >> -					 * reclaimed right now either way, so act just like nothing
> >> -					 * happened.
> >> -					 * We'll come back here later and detect if the folio was
> >> -					 * dirtied when the additional reference is gone.
> >> -					 */
> >> -					set_ptes(mm, address, pvmw.pte, pteval, nr_pages);
> >> -					goto walk_abort;
> >> -				}
> >> -				add_mm_counter(mm, MM_ANONPAGES, -nr_pages);
> >>  				goto discard;
> >>  			}
> >>
> >> --
> >> 2.34.1
> >>
> >
> > Thanks, Lorenzo
> >
>