[v3] mm: batch TLB flushing for dirty folios in vmscan

[PATCH v3 3/5] mm/vmscan: extract folio_free() and pageout_one()

Posted by Zhang Peng 2 days, 16 hours ago

From: Zhang Peng <bruzzhang@tencent.com>

shrink_folio_list() contains two large self-contained sections:
the pageout() dispatch state machine and the folio-freeing path
(buffer release, lazyfree, __remove_mapping, folio_batch). Extract
them into pageout_one() and folio_free() respectively to reduce the
size of shrink_folio_list() and make each step independently readable.

No functional change

Suggested-by: Kairui Song <kasong@tencent.com>
Signed-off-by: Zhang Peng <bruzzhang@tencent.com>
---
 mm/vmscan.c | 270 ++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 155 insertions(+), 115 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0860a48d5bf3..c8ff742ed891 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1070,6 +1070,153 @@ static void folio_active_bounce(struct folio *folio, struct reclaim_stat *stat,
 	}
 }
 
+static bool folio_free(struct folio *folio, struct folio_batch *free_folios,
+		struct scan_control *sc, struct reclaim_stat *stat)
+{
+	unsigned int nr_pages = folio_nr_pages(folio);
+	struct address_space *mapping = folio_mapping(folio);
+
+	/*
+	 * If the folio has buffers, try to free the buffer
+	 * mappings associated with this folio. If we succeed
+	 * we try to free the folio as well.
+	 *
+	 * We do this even if the folio is dirty.
+	 * filemap_release_folio() does not perform I/O, but it
+	 * is possible for a folio to have the dirty flag set,
+	 * but it is actually clean (all its buffers are clean).
+	 * This happens if the buffers were written out directly,
+	 * with submit_bh(). ext3 will do this, as well as
+	 * the blockdev mapping.  filemap_release_folio() will
+	 * discover that cleanness and will drop the buffers
+	 * and mark the folio clean - it can be freed.
+	 *
+	 * Rarely, folios can have buffers and no ->mapping.
+	 * These are the folios which were not successfully
+	 * invalidated in truncate_cleanup_folio().  We try to
+	 * drop those buffers here and if that worked, and the
+	 * folio is no longer mapped into process address space
+	 * (refcount == 1) it can be freed.  Otherwise, leave
+	 * the folio on the LRU so it is swappable.
+	 */
+	if (folio_needs_release(folio)) {
+		if (!filemap_release_folio(folio, sc->gfp_mask)) {
+			folio_active_bounce(folio, stat, nr_pages);
+			return false;
+		}
+
+		if (!mapping && folio_ref_count(folio) == 1) {
+			folio_unlock(folio);
+			if (folio_put_testzero(folio))
+				goto free_it;
+			else {
+				/*
+				 * rare race with speculative reference.
+				 * the speculative reference will free
+				 * this folio shortly, so we may
+				 * increment nr_reclaimed here (and
+				 * leave it off the LRU).
+				 */
+				stat->nr_reclaimed += nr_pages;
+				return true;
+			}
+		}
+	}
+
+	if (folio_test_lazyfree(folio)) {
+		/* follow __remove_mapping for reference */
+		if (!folio_ref_freeze(folio, 1))
+			return false;
+		/*
+		 * The folio has only one reference left, which is
+		 * from the isolation. After the caller puts the
+		 * folio back on the lru and drops the reference, the
+		 * folio will be freed anyway. It doesn't matter
+		 * which lru it goes on. So we don't bother checking
+		 * the dirty flag here.
+		 */
+		count_vm_events(PGLAZYFREED, nr_pages);
+		count_memcg_folio_events(folio, PGLAZYFREED, nr_pages);
+	} else if (!mapping || !__remove_mapping(mapping, folio, true,
+							sc->target_mem_cgroup))
+		return false;
+
+	folio_unlock(folio);
+free_it:
+	/*
+	 * Folio may get swapped out as a whole, need to account
+	 * all pages in it.
+	 */
+	stat->nr_reclaimed += nr_pages;
+
+	folio_unqueue_deferred_split(folio);
+	if (folio_batch_add(free_folios, folio) == 0) {
+		mem_cgroup_uncharge_folios(free_folios);
+		try_to_unmap_flush();
+		free_unref_folios(free_folios);
+	}
+	return true;
+}
+
+static void pageout_one(struct folio *folio, struct list_head *ret_folios,
+			struct folio_batch *free_folios,
+			struct scan_control *sc, struct reclaim_stat *stat,
+			struct swap_iocb **plug, struct list_head *folio_list)
+{
+	struct address_space *mapping = folio_mapping(folio);
+	unsigned int nr_pages = folio_nr_pages(folio);
+
+	switch (pageout(folio, mapping, plug, folio_list)) {
+	case PAGE_ACTIVATE:
+		/*
+		 * If shmem folio is split when writeback to swap,
+		 * the tail pages will make their own pass through
+		 * this function and be accounted then.
+		 */
+		if (nr_pages > 1 && !folio_test_large(folio)) {
+			sc->nr_scanned -= (nr_pages - 1);
+			nr_pages = 1;
+		}
+		folio_active_bounce(folio, stat, nr_pages);
+		fallthrough;
+	case PAGE_KEEP:
+		goto locked_keepit;
+	case PAGE_SUCCESS:
+		if (nr_pages > 1 && !folio_test_large(folio)) {
+			sc->nr_scanned -= (nr_pages - 1);
+			nr_pages = 1;
+		}
+		stat->nr_pageout += nr_pages;
+
+		if (folio_test_writeback(folio))
+			goto keepit;
+		if (folio_test_dirty(folio))
+			goto keepit;
+
+		/*
+		 * A synchronous write - probably a ramdisk.  Go
+		 * ahead and try to reclaim the folio.
+		 */
+		if (!folio_trylock(folio))
+			goto keepit;
+		if (folio_test_dirty(folio) ||
+			folio_test_writeback(folio))
+			goto locked_keepit;
+		mapping = folio_mapping(folio);
+		fallthrough;
+	case PAGE_CLEAN:
+		; /* try to free the folio below */
+	}
+	if (folio_free(folio, free_folios, sc, stat))
+		return;
+locked_keepit:
+	folio_unlock(folio);
+keepit:
+	list_add(&folio->lru, ret_folios);
+	VM_BUG_ON_FOLIO(folio_test_lru(folio) ||
+			folio_test_unevictable(folio), folio);
+}
+
 /*
  * Reclaimed folios are counted in stat->nr_reclaimed.
  */
@@ -1415,125 +1562,16 @@ static void shrink_folio_list(struct list_head *folio_list,
 			 * starts and then write it out here.
 			 */
 			try_to_unmap_flush_dirty();
-			switch (pageout(folio, mapping, &plug, folio_list)) {
-			case PAGE_KEEP:
-				goto keep_locked;
-			case PAGE_ACTIVATE:
-				/*
-				 * If shmem folio is split when writeback to swap,
-				 * the tail pages will make their own pass through
-				 * this function and be accounted then.
-				 */
-				if (nr_pages > 1 && !folio_test_large(folio)) {
-					sc->nr_scanned -= (nr_pages - 1);
-					nr_pages = 1;
-				}
-				goto activate_locked;
-			case PAGE_SUCCESS:
-				if (nr_pages > 1 && !folio_test_large(folio)) {
-					sc->nr_scanned -= (nr_pages - 1);
-					nr_pages = 1;
-				}
-				stat->nr_pageout += nr_pages;
-
-				if (folio_test_writeback(folio))
-					goto keep;
-				if (folio_test_dirty(folio))
-					goto keep;
-
-				/*
-				 * A synchronous write - probably a ramdisk.  Go
-				 * ahead and try to reclaim the folio.
-				 */
-				if (!folio_trylock(folio))
-					goto keep;
-				if (folio_test_dirty(folio) ||
-				    folio_test_writeback(folio))
-					goto keep_locked;
-				mapping = folio_mapping(folio);
-				fallthrough;
-			case PAGE_CLEAN:
-				; /* try to free the folio below */
-			}
-		}
-
-		/*
-		 * If the folio has buffers, try to free the buffer
-		 * mappings associated with this folio. If we succeed
-		 * we try to free the folio as well.
-		 *
-		 * We do this even if the folio is dirty.
-		 * filemap_release_folio() does not perform I/O, but it
-		 * is possible for a folio to have the dirty flag set,
-		 * but it is actually clean (all its buffers are clean).
-		 * This happens if the buffers were written out directly,
-		 * with submit_bh(). ext3 will do this, as well as
-		 * the blockdev mapping.  filemap_release_folio() will
-		 * discover that cleanness and will drop the buffers
-		 * and mark the folio clean - it can be freed.
-		 *
-		 * Rarely, folios can have buffers and no ->mapping.
-		 * These are the folios which were not successfully
-		 * invalidated in truncate_cleanup_folio().  We try to
-		 * drop those buffers here and if that worked, and the
-		 * folio is no longer mapped into process address space
-		 * (refcount == 1) it can be freed.  Otherwise, leave
-		 * the folio on the LRU so it is swappable.
-		 */
-		if (folio_needs_release(folio)) {
-			if (!filemap_release_folio(folio, sc->gfp_mask))
-				goto activate_locked;
-			if (!mapping && folio_ref_count(folio) == 1) {
-				folio_unlock(folio);
-				if (folio_put_testzero(folio))
-					goto free_it;
-				else {
-					/*
-					 * rare race with speculative reference.
-					 * the speculative reference will free
-					 * this folio shortly, so we may
-					 * increment nr_reclaimed here (and
-					 * leave it off the LRU).
-					 */
-					stat->nr_reclaimed += nr_pages;
-					continue;
-				}
-			}
+			pageout_one(folio, &ret_folios, &free_folios, sc, stat,
+				&plug, folio_list);
+			goto next;
 		}
 
-		if (folio_test_lazyfree(folio)) {
-			/* follow __remove_mapping for reference */
-			if (!folio_ref_freeze(folio, 1))
-				goto keep_locked;
-			/*
-			 * The folio has only one reference left, which is
-			 * from the isolation. After the caller puts the
-			 * folio back on the lru and drops the reference, the
-			 * folio will be freed anyway. It doesn't matter
-			 * which lru it goes on. So we don't bother checking
-			 * the dirty flag here.
-			 */
-			count_vm_events(PGLAZYFREED, nr_pages);
-			count_memcg_folio_events(folio, PGLAZYFREED, nr_pages);
-		} else if (!mapping || !__remove_mapping(mapping, folio, true,
-							 sc->target_mem_cgroup))
+		if (!folio_free(folio, &free_folios, sc, stat))
 			goto keep_locked;
 
-		folio_unlock(folio);
-free_it:
-		/*
-		 * Folio may get swapped out as a whole, need to account
-		 * all pages in it.
-		 */
-		stat->nr_reclaimed += nr_pages;
-
-		folio_unqueue_deferred_split(folio);
-		if (folio_batch_add(&free_folios, folio) == 0) {
-			mem_cgroup_uncharge_folios(&free_folios);
-			try_to_unmap_flush();
-			free_unref_folios(&free_folios);
-		}
-		continue;
+		else
+			continue;
 
 activate_locked_split:
 		/*
@@ -1552,6 +1590,8 @@ static void shrink_folio_list(struct list_head *folio_list,
 		list_add(&folio->lru, &ret_folios);
 		VM_BUG_ON_FOLIO(folio_test_lru(folio) ||
 				folio_test_unevictable(folio), folio);
+next:
+		continue;
 	}
 	/* 'folio_list' is always empty here */
 

-- 
2.43.7

Re: [PATCH v3 3/5] mm/vmscan: extract folio_free() and pageout_one()

Posted by Barry Song 2 days, 5 hours ago

On Fri, Apr 10, 2026 at 8:47 PM Zhang Peng <zippermonkey@icloud.com> wrote:
>
> From: Zhang Peng <bruzzhang@tencent.com>
>
> shrink_folio_list() contains two large self-contained sections:
> the pageout() dispatch state machine and the folio-freeing path
> (buffer release, lazyfree, __remove_mapping, folio_batch). Extract
> them into pageout_one() and folio_free() respectively to reduce the
> size of shrink_folio_list() and make each step independently readable.

This one looks good, but:

>
> No functional change
>
> Suggested-by: Kairui Song <kasong@tencent.com>
> Signed-off-by: Zhang Peng <bruzzhang@tencent.com>
> ---
>  mm/vmscan.c | 270 ++++++++++++++++++++++++++++++++++--------------------------
>  1 file changed, 155 insertions(+), 115 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 0860a48d5bf3..c8ff742ed891 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1070,6 +1070,153 @@ static void folio_active_bounce(struct folio *folio, struct reclaim_stat *stat,
>         }
>  }
>
> +static bool folio_free(struct folio *folio, struct folio_batch *free_folios,
> +               struct scan_control *sc, struct reclaim_stat *stat)
> +{
> +       unsigned int nr_pages = folio_nr_pages(folio);
> +       struct address_space *mapping = folio_mapping(folio);
> +
> +       /*
> +        * If the folio has buffers, try to free the buffer
> +        * mappings associated with this folio. If we succeed
> +        * we try to free the folio as well.
> +        *
> +        * We do this even if the folio is dirty.
> +        * filemap_release_folio() does not perform I/O, but it
> +        * is possible for a folio to have the dirty flag set,
> +        * but it is actually clean (all its buffers are clean).
> +        * This happens if the buffers were written out directly,
> +        * with submit_bh(). ext3 will do this, as well as
> +        * the blockdev mapping.  filemap_release_folio() will
> +        * discover that cleanness and will drop the buffers
> +        * and mark the folio clean - it can be freed.
> +        *
> +        * Rarely, folios can have buffers and no ->mapping.
> +        * These are the folios which were not successfully
> +        * invalidated in truncate_cleanup_folio().  We try to
> +        * drop those buffers here and if that worked, and the
> +        * folio is no longer mapped into process address space
> +        * (refcount == 1) it can be freed.  Otherwise, leave
> +        * the folio on the LRU so it is swappable.
> +        */
> +       if (folio_needs_release(folio)) {
> +               if (!filemap_release_folio(folio, sc->gfp_mask)) {
> +                       folio_active_bounce(folio, stat, nr_pages);
> +                       return false;
> +               }
> +
> +               if (!mapping && folio_ref_count(folio) == 1) {
> +                       folio_unlock(folio);
> +                       if (folio_put_testzero(folio))
> +                               goto free_it;
> +                       else {
> +                               /*
> +                                * rare race with speculative reference.
> +                                * the speculative reference will free
> +                                * this folio shortly, so we may
> +                                * increment nr_reclaimed here (and
> +                                * leave it off the LRU).
> +                                */
> +                               stat->nr_reclaimed += nr_pages;
> +                               return true;
> +                       }
> +               }
> +       }
> +
> +       if (folio_test_lazyfree(folio)) {
> +               /* follow __remove_mapping for reference */
> +               if (!folio_ref_freeze(folio, 1))
> +                       return false;
> +               /*
> +                * The folio has only one reference left, which is
> +                * from the isolation. After the caller puts the
> +                * folio back on the lru and drops the reference, the
> +                * folio will be freed anyway. It doesn't matter
> +                * which lru it goes on. So we don't bother checking
> +                * the dirty flag here.
> +                */
> +               count_vm_events(PGLAZYFREED, nr_pages);
> +               count_memcg_folio_events(folio, PGLAZYFREED, nr_pages);
> +       } else if (!mapping || !__remove_mapping(mapping, folio, true,
> +                                                       sc->target_mem_cgroup))
> +               return false;
> +
> +       folio_unlock(folio);
> +free_it:
> +       /*
> +        * Folio may get swapped out as a whole, need to account
> +        * all pages in it.
> +        */
> +       stat->nr_reclaimed += nr_pages;
> +
> +       folio_unqueue_deferred_split(folio);
> +       if (folio_batch_add(free_folios, folio) == 0) {
> +               mem_cgroup_uncharge_folios(free_folios);
> +               try_to_unmap_flush();
> +               free_unref_folios(free_folios);
> +       }
> +       return true;
> +}
> +
> +static void pageout_one(struct folio *folio, struct list_head *ret_folios,
> +                       struct folio_batch *free_folios,
> +                       struct scan_control *sc, struct reclaim_stat *stat,
> +                       struct swap_iocb **plug, struct list_head *folio_list)
> +{
> +       struct address_space *mapping = folio_mapping(folio);
> +       unsigned int nr_pages = folio_nr_pages(folio);
> +
> +       switch (pageout(folio, mapping, plug, folio_list)) {
> +       case PAGE_ACTIVATE:
> +               /*
> +                * If shmem folio is split when writeback to swap,
> +                * the tail pages will make their own pass through
> +                * this function and be accounted then.
> +                */
> +               if (nr_pages > 1 && !folio_test_large(folio)) {
> +                       sc->nr_scanned -= (nr_pages - 1);
> +                       nr_pages = 1;
> +               }
> +               folio_active_bounce(folio, stat, nr_pages);
> +               fallthrough;
> +       case PAGE_KEEP:
> +               goto locked_keepit;
> +       case PAGE_SUCCESS:
> +               if (nr_pages > 1 && !folio_test_large(folio)) {
> +                       sc->nr_scanned -= (nr_pages - 1);
> +                       nr_pages = 1;
> +               }
> +               stat->nr_pageout += nr_pages;
> +
> +               if (folio_test_writeback(folio))
> +                       goto keepit;
> +               if (folio_test_dirty(folio))
> +                       goto keepit;
> +
> +               /*
> +                * A synchronous write - probably a ramdisk.  Go
> +                * ahead and try to reclaim the folio.
> +                */
> +               if (!folio_trylock(folio))
> +                       goto keepit;
> +               if (folio_test_dirty(folio) ||
> +                       folio_test_writeback(folio))
> +                       goto locked_keepit;
> +               mapping = folio_mapping(folio);
> +               fallthrough;
> +       case PAGE_CLEAN:
> +               ; /* try to free the folio below */
> +       }
> +       if (folio_free(folio, free_folios, sc, stat))
> +               return;
> +locked_keepit:
> +       folio_unlock(folio);
> +keepit:
> +       list_add(&folio->lru, ret_folios);
> +       VM_BUG_ON_FOLIO(folio_test_lru(folio) ||
> +                       folio_test_unevictable(folio), folio);
> +}

Can we at least move the “result” out of the function—
whether to “keep” it or not?

Can we have pageout() report its result to shrink_folio_list()?
If everything is hidden inside, it’s hard to tell what
happened to the folio.

This hides too many details that should be exposed to
shrink_folio_list(), making the reclamation flow harder
to understand.

Thanks
Barry

[PATCH v3 1/5] mm/vmscan: track reclaimed pages in reclaim_stat
[PATCH v3 2/5] mm/vmscan: extract folio activation into folio_active_bounce()
[PATCH v3 3/5] mm/vmscan: extract folio_free() and pageout_one()
[PATCH v3 4/5] mm/vmscan: extract folio unmap logic into folio_try_unmap()
[PATCH v3 5/5] mm/vmscan: flush TLB for every 31 folios evictions