[PATCH RFC] mm: mglru: provide a separate list for lazyfree anon folios

Barry Song posted 1 patch 2 months, 2 weeks ago
There is a newer version of this series
include/linux/mm_inline.h |  5 +-
include/linux/mmzone.h    |  2 +-
mm/vmscan.c               | 97 +++++++++++++++++++++++----------------
3 files changed, 61 insertions(+), 43 deletions(-)
[PATCH RFC] mm: mglru: provide a separate list for lazyfree anon folios
Posted by Barry Song 2 months, 2 weeks ago
From: Barry Song <v-songbaohua@oppo.com>

This follows up on the discussion regarding Gaoxu's work[1]. It's
unclear if there's still interest in implementing a separate LRU
list for lazyfree folios, but I decided to explore it out of
curiosity.

According to Lokesh, MADV_FREE'd anon folios are expected to be 
released earlier than file folios. One option, as implemented 
by Gao Xu, is to place lazyfree anon folios at the tail of the 
file's `min_seq` generation. However, this approach results in 
lazyfree folios being released in a LIFO manner, which conflicts 
with LRU behavior, as noted by Michal.

To address this, this patch proposes maintaining a separate list 
for lazyfree anon folios while keeping them classified under the 
"file" LRU type to minimize code changes. These lazyfree anon 
folios will still be counted as file folios and share the same 
generation with regular files. In the eviction path, the lazyfree 
list will be prioritized for scanning before the actual file 
LRU list.

[1] https://lore.kernel.org/linux-mm/f29f64e29c08427b95e3df30a5770056@honor.com/

Signed-off-by: Barry Song <v-songbaohua@oppo.com>
---
 include/linux/mm_inline.h |  5 +-
 include/linux/mmzone.h    |  2 +-
 mm/vmscan.c               | 97 +++++++++++++++++++++++----------------
 3 files changed, 61 insertions(+), 43 deletions(-)

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index f4fe593c1400..118d70ed3120 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -225,6 +225,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
 	int gen = folio_lru_gen(folio);
 	int type = folio_is_file_lru(folio);
 	int zone = folio_zonenum(folio);
+	int lazyfree = type ? folio_test_anon(folio) : 0;
 	struct lru_gen_folio *lrugen = &lruvec->lrugen;
 
 	VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
@@ -262,9 +263,9 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
 	lru_gen_update_size(lruvec, folio, -1, gen);
 	/* for folio_rotate_reclaimable() */
 	if (reclaiming)
-		list_add_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
+		list_add_tail(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
 	else
-		list_add(&folio->lru, &lrugen->folios[gen][type][zone]);
+		list_add(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
 
 	return true;
 }
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 17506e4a2835..5d2331778528 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -434,7 +434,7 @@ struct lru_gen_folio {
 	/* the birth time of each generation in jiffies */
 	unsigned long timestamps[MAX_NR_GENS];
 	/* the multi-gen LRU lists, lazily sorted on eviction */
-	struct list_head folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
+	struct list_head folios[MAX_NR_GENS][ANON_AND_FILE + 1][MAX_NR_ZONES];
 	/* the multi-gen LRU sizes, eventually consistent */
 	long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
 	/* the exponential moving average of refaulted */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 96abf4a52382..9dc665dc6ba9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3725,21 +3725,25 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
 
 	/* prevent cold/hot inversion if force_scan is true */
 	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
-		struct list_head *head = &lrugen->folios[old_gen][type][zone];
+		int list_num = type ? 2 : 1;
+		struct list_head *head;
 
-		while (!list_empty(head)) {
-			struct folio *folio = lru_to_folio(head);
+		for (int i = list_num - 1; i >= 0; i--) {
+			head = &lrugen->folios[old_gen][type + i][zone];
+			while (!list_empty(head)) {
+				struct folio *folio = lru_to_folio(head);
 
-			VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
-			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
-			VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
-			VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
+				VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
+				VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
+				VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
+				VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
 
-			new_gen = folio_inc_gen(lruvec, folio, false);
-			list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]);
+				new_gen = folio_inc_gen(lruvec, folio, false);
+				list_move_tail(&folio->lru, &lrugen->folios[new_gen][type + i][zone]);
 
-			if (!--remaining)
-				return false;
+				if (!--remaining)
+					return false;
+			}
 		}
 	}
 done:
@@ -4291,6 +4295,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
 	int refs = folio_lru_refs(folio);
 	int tier = lru_tier_from_refs(refs);
 	struct lru_gen_folio *lrugen = &lruvec->lrugen;
+	int lazyfree = type ? folio_test_anon(folio) : 0;
 
 	VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio);
 
@@ -4306,7 +4311,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
 
 	/* promoted */
 	if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
-		list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
+		list_move(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
 		return true;
 	}
 
@@ -4315,7 +4320,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
 		int hist = lru_hist_from_seq(lrugen->min_seq[type]);
 
 		gen = folio_inc_gen(lruvec, folio, false);
-		list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
+		list_move_tail(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
 
 		WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
 			   lrugen->protected[hist][type][tier - 1] + delta);
@@ -4325,7 +4330,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
 	/* ineligible */
 	if (!folio_test_lru(folio) || zone > sc->reclaim_idx) {
 		gen = folio_inc_gen(lruvec, folio, false);
-		list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
+		list_move_tail(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
 		return true;
 	}
 
@@ -4333,7 +4338,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
 	if (folio_test_locked(folio) || folio_test_writeback(folio) ||
 	    (type == LRU_GEN_FILE && folio_test_dirty(folio))) {
 		gen = folio_inc_gen(lruvec, folio, true);
-		list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
+		list_move(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
 		return true;
 	}
 
@@ -4377,7 +4382,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
 static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 		       int type, int tier, struct list_head *list)
 {
-	int i;
+	int i, j;
 	int gen;
 	enum vm_event_item item;
 	int sorted = 0;
@@ -4399,33 +4404,38 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 		LIST_HEAD(moved);
 		int skipped_zone = 0;
 		int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
-		struct list_head *head = &lrugen->folios[gen][type][zone];
-
-		while (!list_empty(head)) {
-			struct folio *folio = lru_to_folio(head);
-			int delta = folio_nr_pages(folio);
-
-			VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
-			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
-			VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
-			VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
-
-			scanned += delta;
+		int list_num = type ? 2 : 1;
+		struct list_head *head;
+
+		for (j = list_num - 1; j >= 0; j--) {
+			head = &lrugen->folios[gen][type + j][zone];
+			while (!list_empty(head)) {
+				struct folio *folio = lru_to_folio(head);
+				int delta = folio_nr_pages(folio);
+
+				VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
+				VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
+				VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
+				VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
+
+				scanned += delta;
+
+				if (sort_folio(lruvec, folio, sc, tier))
+					sorted += delta;
+				else if (isolate_folio(lruvec, folio, sc)) {
+					list_add(&folio->lru, list);
+					isolated += delta;
+				} else {
+					list_move(&folio->lru, &moved);
+					skipped_zone += delta;
+				}
 
-			if (sort_folio(lruvec, folio, sc, tier))
-				sorted += delta;
-			else if (isolate_folio(lruvec, folio, sc)) {
-				list_add(&folio->lru, list);
-				isolated += delta;
-			} else {
-				list_move(&folio->lru, &moved);
-				skipped_zone += delta;
+				if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
+					goto isolate_done;
 			}
-
-			if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
-				break;
 		}
 
+isolate_done:
 		if (skipped_zone) {
 			list_splice(&moved, head);
 			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
@@ -5586,8 +5596,15 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
 	for (i = 0; i <= MIN_NR_GENS + 1; i++)
 		lrugen->timestamps[i] = jiffies;
 
-	for_each_gen_type_zone(gen, type, zone)
+	for_each_gen_type_zone(gen, type, zone) {
 		INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);
+		/*
+		 * lazyfree anon folios have a separate list while using
+		 * file as type
+		 */
+		if (type)
+			INIT_LIST_HEAD(&lrugen->folios[gen][type + 1][zone]);
+	}
 
 	if (mm_state)
 		mm_state->seq = MIN_NR_GENS;
-- 
2.39.3 (Apple Git-146)
回复: [PATCH RFC] mm: mglru: provide a separate list for lazyfree anon folios
Posted by gaoxu 2 months, 1 week ago
> 
> From: Barry Song <v-songbaohua@oppo.com>
> 
> This follows up on the discussion regarding Gaoxu's work[1]. It's
> unclear if there's still interest in implementing a separate LRU
> list for lazyfree folios, but I decided to explore it out of
> curiosity.
> 
> According to Lokesh, MADV_FREE'd anon folios are expected to be
> released earlier than file folios. One option, as implemented
> by Gao Xu, is to place lazyfree anon folios at the tail of the
> file's `min_seq` generation. However, this approach results in
> lazyfree folios being released in a LIFO manner, which conflicts
> with LRU behavior, as noted by Michal.
> 
> To address this, this patch proposes maintaining a separate list
> for lazyfree anon folios while keeping them classified under the
> "file" LRU type to minimize code changes. These lazyfree anon
> folios will still be counted as file folios and share the same
> generation with regular files. In the eviction path, the lazyfree
> list will be prioritized for scanning before the actual file
> LRU list.
It seems like a very feasible solution. I will conduct comparative tests
based on this patch and synchronize the test results (it will take some time);
Thanks to Barry for providing the patch!
> 
> [1]
> https://lore.kernel.org/linux-mm/f29f64e29c08427b95e3df30a5770056@honor
> .com/
> 
> Signed-off-by: Barry Song <v-songbaohua@oppo.com>
> ---
>  include/linux/mm_inline.h |  5 +-
>  include/linux/mmzone.h    |  2 +-
>  mm/vmscan.c               | 97 +++++++++++++++++++++++----------------
>  3 files changed, 61 insertions(+), 43 deletions(-)
> 
> diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
> index f4fe593c1400..118d70ed3120 100644
> --- a/include/linux/mm_inline.h
> +++ b/include/linux/mm_inline.h
> @@ -225,6 +225,7 @@ static inline bool lru_gen_add_folio(struct lruvec
> *lruvec, struct folio *folio,
>  	int gen = folio_lru_gen(folio);
>  	int type = folio_is_file_lru(folio);
>  	int zone = folio_zonenum(folio);
> +	int lazyfree = type ? folio_test_anon(folio) : 0;
>  	struct lru_gen_folio *lrugen = &lruvec->lrugen;
> 
>  	VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
> @@ -262,9 +263,9 @@ static inline bool lru_gen_add_folio(struct lruvec
> *lruvec, struct folio *folio,
>  	lru_gen_update_size(lruvec, folio, -1, gen);
>  	/* for folio_rotate_reclaimable() */
>  	if (reclaiming)
> -		list_add_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_add_tail(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
>  	else
> -		list_add(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_add(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
> 
>  	return true;
>  }
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 17506e4a2835..5d2331778528 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -434,7 +434,7 @@ struct lru_gen_folio {
>  	/* the birth time of each generation in jiffies */
>  	unsigned long timestamps[MAX_NR_GENS];
>  	/* the multi-gen LRU lists, lazily sorted on eviction */
> -	struct list_head
> folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
> +	struct list_head folios[MAX_NR_GENS][ANON_AND_FILE +
> 1][MAX_NR_ZONES];
For better understanding and future scalability, could use enum types
instead of numbers, Create a new type, such as: enum folio_type.
>  	/* the multi-gen LRU sizes, eventually consistent */
>  	long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
>  	/* the exponential moving average of refaulted */
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 96abf4a52382..9dc665dc6ba9 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -3725,21 +3725,25 @@ static bool inc_min_seq(struct lruvec *lruvec, int
> type, bool can_swap)
> 
>  	/* prevent cold/hot inversion if force_scan is true */
>  	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
> -		struct list_head *head = &lrugen->folios[old_gen][type][zone];
> +		int list_num = type ? 2 : 1;
> +		struct list_head *head;
> 
> -		while (!list_empty(head)) {
> -			struct folio *folio = lru_to_folio(head);
> +		for (int i = list_num - 1; i >= 0; i--) {
> +			head = &lrugen->folios[old_gen][type + i][zone];
> +			while (!list_empty(head)) {
> +				struct folio *folio = lru_to_folio(head);
> 
> -			VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> folio);
> +				VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> folio);
> +				VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> +				VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> folio);
> +				VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> folio);
> 
> -			new_gen = folio_inc_gen(lruvec, folio, false);
> -			list_move_tail(&folio->lru,
> &lrugen->folios[new_gen][type][zone]);
> +				new_gen = folio_inc_gen(lruvec, folio, false);
> +				list_move_tail(&folio->lru, &lrugen->folios[new_gen][type +
> i][zone]);
> 
> -			if (!--remaining)
> -				return false;
> +				if (!--remaining)
> +					return false;
> +			}
>  		}
>  	}
>  done:
> @@ -4291,6 +4295,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
>  	int refs = folio_lru_refs(folio);
>  	int tier = lru_tier_from_refs(refs);
>  	struct lru_gen_folio *lrugen = &lruvec->lrugen;
> +	int lazyfree = type ? folio_test_anon(folio) : 0;
> 
>  	VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio);
> 
> @@ -4306,7 +4311,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
> 
>  	/* promoted */
>  	if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
> -		list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_move(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
>  		return true;
>  	}
> 
> @@ -4315,7 +4320,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
>  		int hist = lru_hist_from_seq(lrugen->min_seq[type]);
> 
>  		gen = folio_inc_gen(lruvec, folio, false);
> -		list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_move_tail(&folio->lru, &lrugen->folios[gen][type +
> lazyfree][zone]);
> 
>  		WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
>  			   lrugen->protected[hist][type][tier - 1] + delta);
> @@ -4325,7 +4330,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
>  	/* ineligible */
>  	if (!folio_test_lru(folio) || zone > sc->reclaim_idx) {
>  		gen = folio_inc_gen(lruvec, folio, false);
> -		list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_move_tail(&folio->lru, &lrugen->folios[gen][type +
> lazyfree][zone]);
>  		return true;
>  	}
> 
> @@ -4333,7 +4338,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
>  	if (folio_test_locked(folio) || folio_test_writeback(folio) ||
>  	    (type == LRU_GEN_FILE && folio_test_dirty(folio))) {
>  		gen = folio_inc_gen(lruvec, folio, true);
> -		list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_move(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
>  		return true;
>  	}
> 
> @@ -4377,7 +4382,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct
> folio *folio, struct sca
>  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>  		       int type, int tier, struct list_head *list)
>  {
> -	int i;
> +	int i, j;
>  	int gen;
>  	enum vm_event_item item;
>  	int sorted = 0;
> @@ -4399,33 +4404,38 @@ static int scan_folios(struct lruvec *lruvec, struct
> scan_control *sc,
>  		LIST_HEAD(moved);
>  		int skipped_zone = 0;
>  		int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
> -		struct list_head *head = &lrugen->folios[gen][type][zone];
> -
> -		while (!list_empty(head)) {
> -			struct folio *folio = lru_to_folio(head);
> -			int delta = folio_nr_pages(folio);
> -
> -			VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> folio);
> -
> -			scanned += delta;
> +		int list_num = type ? 2 : 1;
> +		struct list_head *head;
> +
> +		for (j = list_num - 1; j >= 0; j--) {
> +			head = &lrugen->folios[gen][type + j][zone];
> +			while (!list_empty(head)) {
> +				struct folio *folio = lru_to_folio(head);
> +				int delta = folio_nr_pages(folio);
> +
> +				VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> folio);
> +				VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> +				VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> folio);
> +				VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> folio);
> +
> +				scanned += delta;
> +
> +				if (sort_folio(lruvec, folio, sc, tier))
> +					sorted += delta;
> +				else if (isolate_folio(lruvec, folio, sc)) {
> +					list_add(&folio->lru, list);
> +					isolated += delta;
> +				} else {
> +					list_move(&folio->lru, &moved);
> +					skipped_zone += delta;
> +				}
> 
> -			if (sort_folio(lruvec, folio, sc, tier))
> -				sorted += delta;
> -			else if (isolate_folio(lruvec, folio, sc)) {
> -				list_add(&folio->lru, list);
> -				isolated += delta;
> -			} else {
> -				list_move(&folio->lru, &moved);
> -				skipped_zone += delta;
> +				if (!--remaining || max(isolated, skipped_zone) >=
> MIN_LRU_BATCH)
> +					goto isolate_done;
>  			}
> -
> -			if (!--remaining || max(isolated, skipped_zone) >=
> MIN_LRU_BATCH)
> -				break;
>  		}
> 
> +isolate_done:
>  		if (skipped_zone) {
>  			list_splice(&moved, head);
>  			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
> @@ -5586,8 +5596,15 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
>  	for (i = 0; i <= MIN_NR_GENS + 1; i++)
>  		lrugen->timestamps[i] = jiffies;
> 
> -	for_each_gen_type_zone(gen, type, zone)
> +	for_each_gen_type_zone(gen, type, zone) {
>  		INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);
> +		/*
> +		 * lazyfree anon folios have a separate list while using
> +		 * file as type
> +		 */
> +		if (type)
> +			INIT_LIST_HEAD(&lrugen->folios[gen][type + 1][zone]);
> +	}
> 
>  	if (mm_state)
>  		mm_state->seq = MIN_NR_GENS;
> --
> 2.39.3 (Apple Git-146)

Re: [PATCH RFC] mm: mglru: provide a separate list for lazyfree anon folios
Posted by Barry Song 2 months, 1 week ago
On Wed, Sep 18, 2024 at 6:19 PM gaoxu <gaoxu2@honor.com> wrote:
>
> >
> > From: Barry Song <v-songbaohua@oppo.com>
> >
> > This follows up on the discussion regarding Gaoxu's work[1]. It's
> > unclear if there's still interest in implementing a separate LRU
> > list for lazyfree folios, but I decided to explore it out of
> > curiosity.
> >
> > According to Lokesh, MADV_FREE'd anon folios are expected to be
> > released earlier than file folios. One option, as implemented
> > by Gao Xu, is to place lazyfree anon folios at the tail of the
> > file's `min_seq` generation. However, this approach results in
> > lazyfree folios being released in a LIFO manner, which conflicts
> > with LRU behavior, as noted by Michal.
> >
> > To address this, this patch proposes maintaining a separate list
> > for lazyfree anon folios while keeping them classified under the
> > "file" LRU type to minimize code changes. These lazyfree anon
> > folios will still be counted as file folios and share the same
> > generation with regular files. In the eviction path, the lazyfree
> > list will be prioritized for scanning before the actual file
> > LRU list.
> It seems like a very feasible solution. I will conduct comparative tests
> based on this patch and synchronize the test results (it will take some time);
> Thanks to Barry for providing the patch!

Thank you, I will await your test results.

> >
> > [1]
> > https://lore.kernel.org/linux-mm/f29f64e29c08427b95e3df30a5770056@honor
> > .com/
> >
> > Signed-off-by: Barry Song <v-songbaohua@oppo.com>
> > ---
> >  include/linux/mm_inline.h |  5 +-
> >  include/linux/mmzone.h    |  2 +-
> >  mm/vmscan.c               | 97 +++++++++++++++++++++++----------------
> >  3 files changed, 61 insertions(+), 43 deletions(-)
> >
> > diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
> > index f4fe593c1400..118d70ed3120 100644
> > --- a/include/linux/mm_inline.h
> > +++ b/include/linux/mm_inline.h
> > @@ -225,6 +225,7 @@ static inline bool lru_gen_add_folio(struct lruvec
> > *lruvec, struct folio *folio,
> >       int gen = folio_lru_gen(folio);
> >       int type = folio_is_file_lru(folio);
> >       int zone = folio_zonenum(folio);
> > +     int lazyfree = type ? folio_test_anon(folio) : 0;
> >       struct lru_gen_folio *lrugen = &lruvec->lrugen;
> >
> >       VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
> > @@ -262,9 +263,9 @@ static inline bool lru_gen_add_folio(struct lruvec
> > *lruvec, struct folio *folio,
> >       lru_gen_update_size(lruvec, folio, -1, gen);
> >       /* for folio_rotate_reclaimable() */
> >       if (reclaiming)
> > -             list_add_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> > +             list_add_tail(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
> >       else
> > -             list_add(&folio->lru, &lrugen->folios[gen][type][zone]);
> > +             list_add(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
> >
> >       return true;
> >  }
> > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> > index 17506e4a2835..5d2331778528 100644
> > --- a/include/linux/mmzone.h
> > +++ b/include/linux/mmzone.h
> > @@ -434,7 +434,7 @@ struct lru_gen_folio {
> >       /* the birth time of each generation in jiffies */
> >       unsigned long timestamps[MAX_NR_GENS];
> >       /* the multi-gen LRU lists, lazily sorted on eviction */
> > -     struct list_head
> > folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
> > +     struct list_head folios[MAX_NR_GENS][ANON_AND_FILE +
> > 1][MAX_NR_ZONES];
> For better understanding and future scalability, could use enum types
> instead of numbers, Create a new type, such as: enum folio_type.

I'd rather follow the "trick" that Yu Zhao has been using such as
int type = folio_is_file_lru(folio);
while I agree providing two macros as below
#define LRU_TYPE_ANON 0
#define LRU_TYPE_FILE   1

might improve the readability by things like:

int list_num = (type  == LRU_TYPE_FILE) ? 2 : 1;

However, considering the code in a larger context, since type =
folio_is_file_lru(folio),
doesn't that imply that type is already set to file? Therefore, the comparison
type == LRU_TYPE_FILE seems redundant.

So, if we want to continue using this approach, it seems that there’s nothing
worth changing?

> >       /* the multi-gen LRU sizes, eventually consistent */
> >       long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
> >       /* the exponential moving average of refaulted */
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > index 96abf4a52382..9dc665dc6ba9 100644
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -3725,21 +3725,25 @@ static bool inc_min_seq(struct lruvec *lruvec, int
> > type, bool can_swap)
> >
> >       /* prevent cold/hot inversion if force_scan is true */
> >       for (zone = 0; zone < MAX_NR_ZONES; zone++) {
> > -             struct list_head *head = &lrugen->folios[old_gen][type][zone];
> > +             int list_num = type ? 2 : 1;
> > +             struct list_head *head;
> >
> > -             while (!list_empty(head)) {
> > -                     struct folio *folio = lru_to_folio(head);
> > +             for (int i = list_num - 1; i >= 0; i--) {
> > +                     head = &lrugen->folios[old_gen][type + i][zone];
> > +                     while (!list_empty(head)) {
> > +                             struct folio *folio = lru_to_folio(head);
> >
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> > folio);
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> > folio);
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> > folio);
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> > folio);
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> > folio);
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> > folio);
> >
> > -                     new_gen = folio_inc_gen(lruvec, folio, false);
> > -                     list_move_tail(&folio->lru,
> > &lrugen->folios[new_gen][type][zone]);
> > +                             new_gen = folio_inc_gen(lruvec, folio, false);
> > +                             list_move_tail(&folio->lru, &lrugen->folios[new_gen][type +
> > i][zone]);
> >
> > -                     if (!--remaining)
> > -                             return false;
> > +                             if (!--remaining)
> > +                                     return false;
> > +                     }
> >               }
> >       }
> >  done:
> > @@ -4291,6 +4295,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> > *folio, struct scan_c
> >       int refs = folio_lru_refs(folio);
> >       int tier = lru_tier_from_refs(refs);
> >       struct lru_gen_folio *lrugen = &lruvec->lrugen;
> > +     int lazyfree = type ? folio_test_anon(folio) : 0;
> >
> >       VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio);
> >
> > @@ -4306,7 +4311,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> > *folio, struct scan_c
> >
> >       /* promoted */
> >       if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
> > -             list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> > +             list_move(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
> >               return true;
> >       }
> >
> > @@ -4315,7 +4320,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> > *folio, struct scan_c
> >               int hist = lru_hist_from_seq(lrugen->min_seq[type]);
> >
> >               gen = folio_inc_gen(lruvec, folio, false);
> > -             list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> > +             list_move_tail(&folio->lru, &lrugen->folios[gen][type +
> > lazyfree][zone]);
> >
> >               WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
> >                          lrugen->protected[hist][type][tier - 1] + delta);
> > @@ -4325,7 +4330,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> > *folio, struct scan_c
> >       /* ineligible */
> >       if (!folio_test_lru(folio) || zone > sc->reclaim_idx) {
> >               gen = folio_inc_gen(lruvec, folio, false);
> > -             list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> > +             list_move_tail(&folio->lru, &lrugen->folios[gen][type +
> > lazyfree][zone]);
> >               return true;
> >       }
> >
> > @@ -4333,7 +4338,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> > *folio, struct scan_c
> >       if (folio_test_locked(folio) || folio_test_writeback(folio) ||
> >           (type == LRU_GEN_FILE && folio_test_dirty(folio))) {
> >               gen = folio_inc_gen(lruvec, folio, true);
> > -             list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> > +             list_move(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
> >               return true;
> >       }
> >
> > @@ -4377,7 +4382,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct
> > folio *folio, struct sca
> >  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> >                      int type, int tier, struct list_head *list)
> >  {
> > -     int i;
> > +     int i, j;
> >       int gen;
> >       enum vm_event_item item;
> >       int sorted = 0;
> > @@ -4399,33 +4404,38 @@ static int scan_folios(struct lruvec *lruvec, struct
> > scan_control *sc,
> >               LIST_HEAD(moved);
> >               int skipped_zone = 0;
> >               int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
> > -             struct list_head *head = &lrugen->folios[gen][type][zone];
> > -
> > -             while (!list_empty(head)) {
> > -                     struct folio *folio = lru_to_folio(head);
> > -                     int delta = folio_nr_pages(folio);
> > -
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> > folio);
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> > folio);
> > -                     VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> > folio);
> > -
> > -                     scanned += delta;
> > +             int list_num = type ? 2 : 1;
> > +             struct list_head *head;
> > +
> > +             for (j = list_num - 1; j >= 0; j--) {
> > +                     head = &lrugen->folios[gen][type + j][zone];
> > +                     while (!list_empty(head)) {
> > +                             struct folio *folio = lru_to_folio(head);
> > +                             int delta = folio_nr_pages(folio);
> > +
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> > folio);
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> > folio);
> > +                             VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> > folio);
> > +
> > +                             scanned += delta;
> > +
> > +                             if (sort_folio(lruvec, folio, sc, tier))
> > +                                     sorted += delta;
> > +                             else if (isolate_folio(lruvec, folio, sc)) {
> > +                                     list_add(&folio->lru, list);
> > +                                     isolated += delta;
> > +                             } else {
> > +                                     list_move(&folio->lru, &moved);
> > +                                     skipped_zone += delta;
> > +                             }
> >
> > -                     if (sort_folio(lruvec, folio, sc, tier))
> > -                             sorted += delta;
> > -                     else if (isolate_folio(lruvec, folio, sc)) {
> > -                             list_add(&folio->lru, list);
> > -                             isolated += delta;
> > -                     } else {
> > -                             list_move(&folio->lru, &moved);
> > -                             skipped_zone += delta;
> > +                             if (!--remaining || max(isolated, skipped_zone) >=
> > MIN_LRU_BATCH)
> > +                                     goto isolate_done;
> >                       }
> > -
> > -                     if (!--remaining || max(isolated, skipped_zone) >=
> > MIN_LRU_BATCH)
> > -                             break;
> >               }
> >
> > +isolate_done:
> >               if (skipped_zone) {
> >                       list_splice(&moved, head);
> >                       __count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
> > @@ -5586,8 +5596,15 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
> >       for (i = 0; i <= MIN_NR_GENS + 1; i++)
> >               lrugen->timestamps[i] = jiffies;
> >
> > -     for_each_gen_type_zone(gen, type, zone)
> > +     for_each_gen_type_zone(gen, type, zone) {
> >               INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);
> > +             /*
> > +              * lazyfree anon folios have a separate list while using
> > +              * file as type
> > +              */
> > +             if (type)
> > +                     INIT_LIST_HEAD(&lrugen->folios[gen][type + 1][zone]);
> > +     }
> >
> >       if (mm_state)
> >               mm_state->seq = MIN_NR_GENS;
> > --
> > 2.39.3 (Apple Git-146)
>

Thanks
Barry
Re: [PATCH RFC] mm: mglru: provide a separate list for lazyfree anon folios
Posted by David Hildenbrand 2 months, 1 week ago
On 14.09.24 08:37, Barry Song wrote:
> From: Barry Song <v-songbaohua@oppo.com>
> 
> This follows up on the discussion regarding Gaoxu's work[1]. It's
> unclear if there's still interest in implementing a separate LRU
> list for lazyfree folios, but I decided to explore it out of
> curiosity.
> 
> According to Lokesh, MADV_FREE'd anon folios are expected to be
> released earlier than file folios. One option, as implemented
> by Gao Xu, is to place lazyfree anon folios at the tail of the
> file's `min_seq` generation. However, this approach results in
> lazyfree folios being released in a LIFO manner, which conflicts
> with LRU behavior, as noted by Michal.
> 
> To address this, this patch proposes maintaining a separate list
> for lazyfree anon folios while keeping them classified under the
> "file" LRU type to minimize code changes. These lazyfree anon
> folios will still be counted as file folios and share the same
> generation with regular files. In the eviction path, the lazyfree
> list will be prioritized for scanning before the actual file
> LRU list.
> 

What's the downside of another LRU list? Do we have any experience on that?

-- 
Cheers,

David / dhildenb
Re: [PATCH RFC] mm: mglru: provide a separate list for lazyfree anon folios
Posted by Barry Song 2 months, 1 week ago
On Wed, Sep 18, 2024 at 12:02 AM David Hildenbrand <david@redhat.com> wrote:
>
> On 14.09.24 08:37, Barry Song wrote:
> > From: Barry Song <v-songbaohua@oppo.com>
> >
> > This follows up on the discussion regarding Gaoxu's work[1]. It's
> > unclear if there's still interest in implementing a separate LRU
> > list for lazyfree folios, but I decided to explore it out of
> > curiosity.
> >
> > According to Lokesh, MADV_FREE'd anon folios are expected to be
> > released earlier than file folios. One option, as implemented
> > by Gao Xu, is to place lazyfree anon folios at the tail of the
> > file's `min_seq` generation. However, this approach results in
> > lazyfree folios being released in a LIFO manner, which conflicts
> > with LRU behavior, as noted by Michal.
> >
> > To address this, this patch proposes maintaining a separate list
> > for lazyfree anon folios while keeping them classified under the
> > "file" LRU type to minimize code changes. These lazyfree anon
> > folios will still be counted as file folios and share the same
> > generation with regular files. In the eviction path, the lazyfree
> > list will be prioritized for scanning before the actual file
> > LRU list.
> >
>
> What's the downside of another LRU list? Do we have any experience on that?

Essentially, the goal is to address the downsides of using a single LRU list for
files and lazyfree anonymous pages - seriously more files re-faults.

I'm not entirely clear on the downsides of having an additional LRU
list. While it
does increase complexity, it doesn't seem to be significant.

Let's wait for Gaoxu's test results before deciding on the next steps.
I was just
curious about how difficult it would be to add a separate list, so I
took two hours
to explore it :-)

>
> --
> Cheers,
>
> David / dhildenb
>

Thanks
Barry
回复: [PATCH RFC] mm: mglru: provide a separate list for lazyfree anon folios
Posted by gaoxu 1 month, 2 weeks ago
> 
> On Wed, Sep 18, 2024 at 12:02 AM David Hildenbrand <david@redhat.com>
> wrote:
> >
> > On 14.09.24 08:37, Barry Song wrote:
> > > From: Barry Song <v-songbaohua@oppo.com>
> > >
> > > This follows up on the discussion regarding Gaoxu's work[1]. It's
> > > unclear if there's still interest in implementing a separate LRU
> > > list for lazyfree folios, but I decided to explore it out of
> > > curiosity.
> > >
> > > According to Lokesh, MADV_FREE'd anon folios are expected to be
> > > released earlier than file folios. One option, as implemented by Gao
> > > Xu, is to place lazyfree anon folios at the tail of the file's
> > > `min_seq` generation. However, this approach results in lazyfree
> > > folios being released in a LIFO manner, which conflicts with LRU
> > > behavior, as noted by Michal.
> > >
> > > To address this, this patch proposes maintaining a separate list for
> > > lazyfree anon folios while keeping them classified under the "file"
> > > LRU type to minimize code changes. These lazyfree anon folios will
> > > still be counted as file folios and share the same generation with
> > > regular files. In the eviction path, the lazyfree list will be
> > > prioritized for scanning before the actual file LRU list.
> > >
> >
> > What's the downside of another LRU list? Do we have any experience on that?
> 
> Essentially, the goal is to address the downsides of using a single LRU list for files
> and lazyfree anonymous pages - seriously more files re-faults.
> 
> I'm not entirely clear on the downsides of having an additional LRU list. While it
> does increase complexity, it doesn't seem to be significant.
> 
> Let's wait for Gaoxu's test results before deciding on the next steps.
> I was just
> curious about how difficult it would be to add a separate list, so I took two hours
> to explore it :-)
Hi song,
I'm very sorry, various reasons combined have caused the delay in the results.

Basic version:android V (enable Android ART use MADV_FREE)
Test cases: 60 apps repeatedly restarted, tested for 8 hours;
The test results are as follows:
        workingset_refault_anon   workingset_refault_file
base        42016805                92010542
patch       19834873                49383572
% diff       -52.79%                  -46.33%

Additionally, a comparative test was conducted on
add-lazyfree-folio-to-lru-tail.patch[1], and the results are as follows:
               workingset_refault_anon   workingset_refault_file
lazyfree-tail        20313395                 52203061
patch             19834873                 49383572
% diff              -2.36%                    -5.40%

From the results, it can be seen that this patch is very beneficial and
better than the results in [1]; it can solve the performance issue of high
IO caused by extensive use of MADV_FREE on the Android platform.

Test case notes: There is a discrepancy between the test results mentioned in
[1] and the current test results because the test cases are different. The test
case used in [1] involves actions such as clicking and swiping within the app
after it starts; For the sake of convenience and result stability, the current
test case only involves app startup without clicking and swiping, and the number
of apps has been increased (30->60).

1. https://lore.kernel.org/all/f29f64e29c08427b95e3df30a5770056@honor.com/T/#u
> 
> >
> > --
> > Cheers,
> >
> > David / dhildenb
> >
> 
> Thanks
> Barry
Re: [PATCH RFC] mm: mglru: provide a separate list for lazyfree anon folios
Posted by Barry Song 1 month, 2 weeks ago
On Tue, Oct 15, 2024 at 11:03 PM gaoxu <gaoxu2@honor.com> wrote:
>
> >
> > On Wed, Sep 18, 2024 at 12:02 AM David Hildenbrand <david@redhat.com>
> > wrote:
> > >
> > > On 14.09.24 08:37, Barry Song wrote:
> > > > From: Barry Song <v-songbaohua@oppo.com>
> > > >
> > > > This follows up on the discussion regarding Gaoxu's work[1]. It's
> > > > unclear if there's still interest in implementing a separate LRU
> > > > list for lazyfree folios, but I decided to explore it out of
> > > > curiosity.
> > > >
> > > > According to Lokesh, MADV_FREE'd anon folios are expected to be
> > > > released earlier than file folios. One option, as implemented by Gao
> > > > Xu, is to place lazyfree anon folios at the tail of the file's
> > > > `min_seq` generation. However, this approach results in lazyfree
> > > > folios being released in a LIFO manner, which conflicts with LRU
> > > > behavior, as noted by Michal.
> > > >
> > > > To address this, this patch proposes maintaining a separate list for
> > > > lazyfree anon folios while keeping them classified under the "file"
> > > > LRU type to minimize code changes. These lazyfree anon folios will
> > > > still be counted as file folios and share the same generation with
> > > > regular files. In the eviction path, the lazyfree list will be
> > > > prioritized for scanning before the actual file LRU list.
> > > >
> > >
> > > What's the downside of another LRU list? Do we have any experience on that?
> >
> > Essentially, the goal is to address the downsides of using a single LRU list for files
> > and lazyfree anonymous pages - seriously more files re-faults.
> >
> > I'm not entirely clear on the downsides of having an additional LRU list. While it
> > does increase complexity, it doesn't seem to be significant.
> >
> > Let's wait for Gaoxu's test results before deciding on the next steps.
> > I was just
> > curious about how difficult it would be to add a separate list, so I took two hours
> > to explore it :-)
> Hi song,
> I'm very sorry, various reasons combined have caused the delay in the results.
>
> Basic version:android V (enable Android ART use MADV_FREE)
> Test cases: 60 apps repeatedly restarted, tested for 8 hours;
> The test results are as follows:
>         workingset_refault_anon   workingset_refault_file
> base        42016805                92010542
> patch       19834873                49383572
> % diff       -52.79%                  -46.33%
>
> Additionally, a comparative test was conducted on
> add-lazyfree-folio-to-lru-tail.patch[1], and the results are as follows:
>                workingset_refault_anon   workingset_refault_file
> lazyfree-tail        20313395                 52203061
> patch             19834873                 49383572
> % diff              -2.36%                    -5.40%
>
> From the results, it can be seen that this patch is very beneficial and
> better than the results in [1]; it can solve the performance issue of high
> IO caused by extensive use of MADV_FREE on the Android platform.
>

Thank you for the testing and data. The results look promising. Would you
mind if I send a v2 with the test data and your tag included in the changelog?
I mean:

Tested-by: Gao Xu <gaoxu2@hihonor.com>

> Test case notes: There is a discrepancy between the test results mentioned in
> [1] and the current test results because the test cases are different. The test
> case used in [1] involves actions such as clicking and swiping within the app
> after it starts; For the sake of convenience and result stability, the current
> test case only involves app startup without clicking and swiping, and the number
> of apps has been increased (30->60).
>
> 1. https://lore.kernel.org/all/f29f64e29c08427b95e3df30a5770056@honor.com/T/#u
> >
> > >
> > > --
> > > Cheers,
> > >
> > > David / dhildenb
> > >
> >

Thanks
Barry
回复: [PATCH RFC] mm: mglru: provide a separate list for lazyfree anon folios
Posted by gaoxu 1 month, 2 weeks ago

> -----邮件原件-----
> 发件人: Barry Song <21cnbao@gmail.com>
> 发送时间: 2024年10月16日 4:10
> 收件人: gaoxu <gaoxu2@honor.com>
> 抄送: David Hildenbrand <david@redhat.com>; akpm@linux-foundation.org;
> linux-mm@kvack.org; mhocko@suse.com; hailong.liu@oppo.com;
> kaleshsingh@google.com; linux-kernel@vger.kernel.org;
> lokeshgidra@google.com; ngeoffray@google.com; shli@fb.com;
> surenb@google.com; yuzhao@google.com; minchan@kernel.org; Barry Song
> <v-songbaohua@oppo.com>
> 主题: Re: [PATCH RFC] mm: mglru: provide a separate list for lazyfree anon folios
> 
> On Tue, Oct 15, 2024 at 11:03 PM gaoxu <gaoxu2@honor.com> wrote:
> >
> > >
> > > On Wed, Sep 18, 2024 at 12:02 AM David Hildenbrand
> > > <david@redhat.com>
> > > wrote:
> > > >
> > > > On 14.09.24 08:37, Barry Song wrote:
> > > > > From: Barry Song <v-songbaohua@oppo.com>
> > > > >
> > > > > This follows up on the discussion regarding Gaoxu's work[1].
> > > > > It's unclear if there's still interest in implementing a
> > > > > separate LRU list for lazyfree folios, but I decided to explore
> > > > > it out of curiosity.
> > > > >
> > > > > According to Lokesh, MADV_FREE'd anon folios are expected to be
> > > > > released earlier than file folios. One option, as implemented by
> > > > > Gao Xu, is to place lazyfree anon folios at the tail of the
> > > > > file's `min_seq` generation. However, this approach results in
> > > > > lazyfree folios being released in a LIFO manner, which conflicts
> > > > > with LRU behavior, as noted by Michal.
> > > > >
> > > > > To address this, this patch proposes maintaining a separate list
> > > > > for lazyfree anon folios while keeping them classified under the "file"
> > > > > LRU type to minimize code changes. These lazyfree anon folios
> > > > > will still be counted as file folios and share the same
> > > > > generation with regular files. In the eviction path, the
> > > > > lazyfree list will be prioritized for scanning before the actual file LRU list.
> > > > >
> > > >
> > > > What's the downside of another LRU list? Do we have any experience on
> that?
> > >
> > > Essentially, the goal is to address the downsides of using a single
> > > LRU list for files and lazyfree anonymous pages - seriously more files re-faults.
> > >
> > > I'm not entirely clear on the downsides of having an additional LRU
> > > list. While it does increase complexity, it doesn't seem to be significant.
> > >
> > > Let's wait for Gaoxu's test results before deciding on the next steps.
> > > I was just
> > > curious about how difficult it would be to add a separate list, so I
> > > took two hours to explore it :-)
> > Hi song,
> > I'm very sorry, various reasons combined have caused the delay in the results.
> >
> > Basic version:android V (enable Android ART use MADV_FREE) Test cases:
> > 60 apps repeatedly restarted, tested for 8 hours; The test results are
> > as follows:
> >         workingset_refault_anon   workingset_refault_file
> > base        42016805                92010542
> > patch       19834873                49383572
> > % diff       -52.79%                  -46.33%
> >
> > Additionally, a comparative test was conducted on
> > add-lazyfree-folio-to-lru-tail.patch[1], and the results are as follows:
> >                workingset_refault_anon   workingset_refault_file
> > lazyfree-tail        20313395                 52203061
> > patch             19834873                 49383572
> > % diff              -2.36%                    -5.40%
> >
> > From the results, it can be seen that this patch is very beneficial
> > and better than the results in [1]; it can solve the performance issue
> > of high IO caused by extensive use of MADV_FREE on the Android platform.
> >
> 
> Thank you for the testing and data. The results look promising. Would you mind
> if I send a v2 with the test data and your tag included in the changelog?
> I mean:
> 
> Tested-by: Gao Xu <gaoxu2@hihonor.com>
Of course not, I'd be happy to.

Due to IO performance issues, Android has reverted the ART use of MADV_FREE;
it is expected that once the issue is resolved, Android ART will re-enable MADV_FREE 
and promote the use of MADV_FREE in more modules.
> 
> > Test case notes: There is a discrepancy between the test results
> > mentioned in [1] and the current test results because the test cases
> > are different. The test case used in [1] involves actions such as
> > clicking and swiping within the app after it starts; For the sake of
> > convenience and result stability, the current test case only involves
> > app startup without clicking and swiping, and the number of apps has been
> increased (30->60).
> >
> > 1.
> > https://lore.kernel.org/all/f29f64e29c08427b95e3df30a5770056@honor.com
> > /T/#u
> > >
> > > >
> > > > --
> > > > Cheers,
> > > >
> > > > David / dhildenb
> > > >
> > >
> 
> Thanks
> Barry
RE: [PATCH RFC] mm: mglru: provide a separate list for lazyfree anon folios
Posted by wang wei 2 months, 2 weeks ago
> -----Original Message-----
> From: linux-kernel+bounces-329184-a929244872=163.com@vger.kernel.org
> <linux-kernel+bounces-329184-a929244872=163.com@vger.kernel.org> On
> Behalf Of Barry Song
> Sent: Saturday, September 14, 2024 2:38 PM
> To: akpm@linux-foundation.org; linux-mm@kvack.org
> Cc: mhocko@suse.com; fengbaopeng@honor.com; gaoxu2@honor.com;
> hailong.liu@oppo.com; kaleshsingh@google.com; linux-
> kernel@vger.kernel.org; lokeshgidra@google.com; ngeoffray@google.com;
> shli@fb.com; surenb@google.com; yipengxiang@honor.com;
> david@redhat.com; yuzhao@google.com; minchan@kernel.org; Barry Song
> <v-songbaohua@oppo.com>
> Subject: [PATCH RFC] mm: mglru: provide a separate list for lazyfree anon
> folios
> 
> From: Barry Song <v-songbaohua@oppo.com>
> 
> This follows up on the discussion regarding Gaoxu's work[1]. It's unclear
if
> there's still interest in implementing a separate LRU list for lazyfree
folios, but I
> decided to explore it out of curiosity.
> 
> According to Lokesh, MADV_FREE'd anon folios are expected to be released
> earlier than file folios. One option, as implemented by Gao Xu, is to
place
> lazyfree anon folios at the tail of the file's `min_seq` generation.
However, this
> approach results in lazyfree folios being released in a LIFO manner, which
> conflicts with LRU behavior, as noted by Michal.
> 
> To address this, this patch proposes maintaining a separate list for
lazyfree
> anon folios while keeping them classified under the "file" LRU type to
minimize
> code changes. These lazyfree anon folios will still be counted as file
folios and
> share the same generation with regular files. In the eviction path, the
lazyfree
> list will be prioritized for scanning before the actual file LRU list.
> 
> [1] https://lore.kernel.org/linux-
> mm/f29f64e29c08427b95e3df30a5770056@honor.com/
> 
> Signed-off-by: Barry Song <v-songbaohua@oppo.com>
> ---
>  include/linux/mm_inline.h |  5 +-
>  include/linux/mmzone.h    |  2 +-
>  mm/vmscan.c               | 97 +++++++++++++++++++++++----------------
>  3 files changed, 61 insertions(+), 43 deletions(-)
> 
> diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index
> f4fe593c1400..118d70ed3120 100644
> --- a/include/linux/mm_inline.h
> +++ b/include/linux/mm_inline.h
> @@ -225,6 +225,7 @@ static inline bool lru_gen_add_folio(struct lruvec
> *lruvec, struct folio *folio,
>  	int gen = folio_lru_gen(folio);
>  	int type = folio_is_file_lru(folio);
>  	int zone = folio_zonenum(folio);
> +	int lazyfree = type ? folio_test_anon(folio) : 0;
>  	struct lru_gen_folio *lrugen = &lruvec->lrugen;
> 
>  	VM_WARN_ON_ONCE_FOLIO(gen != -1, folio); @@ -262,9 +263,9
> @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct
folio
> *folio,
>  	lru_gen_update_size(lruvec, folio, -1, gen);
>  	/* for folio_rotate_reclaimable() */
>  	if (reclaiming)
> -		list_add_tail(&folio->lru,
&lrugen->folios[gen][type][zone]);
> +		list_add_tail(&folio->lru, &lrugen->folios[gen][type +
> +lazyfree][zone]);
>  	else
> -		list_add(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_add(&folio->lru, &lrugen->folios[gen][type +
> lazyfree][zone]);
> 
>  	return true;
>  }
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index
> 17506e4a2835..5d2331778528 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -434,7 +434,7 @@ struct lru_gen_folio {
>  	/* the birth time of each generation in jiffies */
>  	unsigned long timestamps[MAX_NR_GENS];
>  	/* the multi-gen LRU lists, lazily sorted on eviction */
> -	struct list_head
> folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
> +	struct list_head folios[MAX_NR_GENS][ANON_AND_FILE +
> 1][MAX_NR_ZONES];
This also divides lazy free filio into MAX_NR_ZONES generations. 
The gen of a lazy free filio depends on the gen in the anno list before 
it is marked as lazy free. Whether it will happen that lazy free filios 
are released in an order that is not consistent with the order of the mark?

>  	/* the multi-gen LRU sizes, eventually consistent */
>  	long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
>  	/* the exponential moving average of refaulted */ diff --git
> a/mm/vmscan.c b/mm/vmscan.c index 96abf4a52382..9dc665dc6ba9
> 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -3725,21 +3725,25 @@ static bool inc_min_seq(struct lruvec *lruvec, int
> type, bool can_swap)
> 
>  	/* prevent cold/hot inversion if force_scan is true */
>  	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
> -		struct list_head *head = &lrugen-
> >folios[old_gen][type][zone];
> +		int list_num = type ? 2 : 1;
> +		struct list_head *head;
> 
> -		while (!list_empty(head)) {
> -			struct folio *folio = lru_to_folio(head);
> +		for (int i = list_num - 1; i >= 0; i--) {
> +			head = &lrugen->folios[old_gen][type + i][zone];
> +			while (!list_empty(head)) {
> +				struct folio *folio = lru_to_folio(head);
> 
> -
> 	VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio),
> folio);
> -
> 	VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
> -
> 	VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
> 
> -			new_gen = folio_inc_gen(lruvec, folio, false);
> -			list_move_tail(&folio->lru, &lrugen-
> >folios[new_gen][type][zone]);
> +				new_gen = folio_inc_gen(lruvec, folio,
false);
> +				list_move_tail(&folio->lru, &lrugen-
> >folios[new_gen][type +
> +i][zone]);
> 
> -			if (!--remaining)
> -				return false;
> +				if (!--remaining)
> +					return false;
> +			}
>  		}
>  	}
>  done:
> @@ -4291,6 +4295,7 @@ static bool sort_folio(struct lruvec *lruvec, struct
> folio *folio, struct scan_c
>  	int refs = folio_lru_refs(folio);
>  	int tier = lru_tier_from_refs(refs);
>  	struct lru_gen_folio *lrugen = &lruvec->lrugen;
> +	int lazyfree = type ? folio_test_anon(folio) : 0;
> 
>  	VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio);
> 
> @@ -4306,7 +4311,7 @@ static bool sort_folio(struct lruvec *lruvec, struct
> folio *folio, struct scan_c
> 
>  	/* promoted */
>  	if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
> -		list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_move(&folio->lru, &lrugen->folios[gen][type +
> lazyfree][zone]);
>  		return true;
>  	}
> 
> @@ -4315,7 +4320,7 @@ static bool sort_folio(struct lruvec *lruvec, struct
> folio *folio, struct scan_c
>  		int hist = lru_hist_from_seq(lrugen->min_seq[type]);
> 
>  		gen = folio_inc_gen(lruvec, folio, false);
> -		list_move_tail(&folio->lru,
&lrugen->folios[gen][type][zone]);
> +		list_move_tail(&folio->lru, &lrugen->folios[gen][type +
> +lazyfree][zone]);
> 
>  		WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
>  			   lrugen->protected[hist][type][tier - 1] + delta);
@@ -
> 4325,7 +4330,7 @@ static bool sort_folio(struct lruvec *lruvec, struct
folio
> *folio, struct scan_c
>  	/* ineligible */
>  	if (!folio_test_lru(folio) || zone > sc->reclaim_idx) {
>  		gen = folio_inc_gen(lruvec, folio, false);
> -		list_move_tail(&folio->lru,
&lrugen->folios[gen][type][zone]);
> +		list_move_tail(&folio->lru, &lrugen->folios[gen][type +
> +lazyfree][zone]);
>  		return true;
>  	}
> 
> @@ -4333,7 +4338,7 @@ static bool sort_folio(struct lruvec *lruvec, struct
> folio *folio, struct scan_c
>  	if (folio_test_locked(folio) || folio_test_writeback(folio) ||
>  	    (type == LRU_GEN_FILE && folio_test_dirty(folio))) {
>  		gen = folio_inc_gen(lruvec, folio, true);
> -		list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> +		list_move(&folio->lru, &lrugen->folios[gen][type +
> lazyfree][zone]);
>  		return true;
>  	}
> 
> @@ -4377,7 +4382,7 @@ static bool isolate_folio(struct lruvec *lruvec,
struct
> folio *folio, struct sca  static int scan_folios(struct lruvec *lruvec,
struct
> scan_control *sc,
>  		       int type, int tier, struct list_head *list)  {
> -	int i;
> +	int i, j;
>  	int gen;
>  	enum vm_event_item item;
>  	int sorted = 0;
> @@ -4399,33 +4404,38 @@ static int scan_folios(struct lruvec *lruvec,
struct
> scan_control *sc,
>  		LIST_HEAD(moved);
>  		int skipped_zone = 0;
>  		int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
> -		struct list_head *head = &lrugen->folios[gen][type][zone];
> -
> -		while (!list_empty(head)) {
> -			struct folio *folio = lru_to_folio(head);
> -			int delta = folio_nr_pages(folio);
> -
> -
> 	VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
> -			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio),
> folio);
> -
> 	VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
> -
> 	VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
> -
> -			scanned += delta;
> +		int list_num = type ? 2 : 1;
> +		struct list_head *head;
In addition, scan_folios will also age lazy free list. Is this necessary?
> +
> +		for (j = list_num - 1; j >= 0; j--) {
> +			head = &lrugen->folios[gen][type + j][zone];
> +			while (!list_empty(head)) {
> +				struct folio *folio = lru_to_folio(head);
> +				int delta = folio_nr_pages(folio);
> +
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
> +
> 	VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
> +
> +				scanned += delta;
> +
> +				if (sort_folio(lruvec, folio, sc, tier))
> +					sorted += delta;
> +				else if (isolate_folio(lruvec, folio, sc)) {
> +					list_add(&folio->lru, list);
> +					isolated += delta;
> +				} else {
> +					list_move(&folio->lru, &moved);
> +					skipped_zone += delta;
> +				}
> 
> -			if (sort_folio(lruvec, folio, sc, tier))
> -				sorted += delta;
> -			else if (isolate_folio(lruvec, folio, sc)) {
> -				list_add(&folio->lru, list);
> -				isolated += delta;
> -			} else {
> -				list_move(&folio->lru, &moved);
> -				skipped_zone += delta;
> +				if (!--remaining || max(isolated,
> skipped_zone) >= MIN_LRU_BATCH)
> +					goto isolate_done;
>  			}
> -
> -			if (!--remaining || max(isolated, skipped_zone) >=
> MIN_LRU_BATCH)
> -				break;
>  		}
> 
> +isolate_done:
>  		if (skipped_zone) {
>  			list_splice(&moved, head);
>  			__count_zid_vm_events(PGSCAN_SKIP, zone,
> skipped_zone); @@ -5586,8 +5596,15 @@ void lru_gen_init_lruvec(struct
> lruvec *lruvec)
>  	for (i = 0; i <= MIN_NR_GENS + 1; i++)
>  		lrugen->timestamps[i] = jiffies;
> 
> -	for_each_gen_type_zone(gen, type, zone)
> +	for_each_gen_type_zone(gen, type, zone) {
>  		INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);
> +		/*
> +		 * lazyfree anon folios have a separate list while using
> +		 * file as type
> +		 */
> +		if (type)
> +			INIT_LIST_HEAD(&lrugen->folios[gen][type +
> 1][zone]);
> +	}
> 
>  	if (mm_state)
>  		mm_state->seq = MIN_NR_GENS;
> --
> 2.39.3 (Apple Git-146)
RE: [PATCH RFC] mm: mglru: provide a separate list for lazyfree anon folios
Posted by Barry Song 1 month, 2 weeks ago
>> +++ b/include/linux/mmzone.h
>> @@ -434,7 +434,7 @@ struct lru_gen_folio {
>>  	/* the birth time of each generation in jiffies */
>>  	unsigned long timestamps[MAX_NR_GENS];
>>  	/* the multi-gen LRU lists, lazily sorted on eviction */
>> -	struct list_head
>> folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
>> +	struct list_head folios[MAX_NR_GENS][ANON_AND_FILE +
>> 1][MAX_NR_ZONES];
> This also divides lazy free filio into MAX_NR_ZONES generations.
> The gen of a lazy free filio depends on the gen in the anno list before
> it is marked as lazy free. Whether it will happen that lazy free filios
> are released in an order that is not consistent with the order of the mark?

No, this separate list ensures that lazyfree folios are released in the
same order they were marked as lazyfree. Note that any newly marked
lazyfree folio is always placed in the most likely reclaimed
generation, regardless of the list.

static void lru_lazyfree(struct lruvec *lruvec, struct folio *folio)
{
        long nr_pages = folio_nr_pages(folio);

        if (!folio_test_anon(folio) || !folio_test_swapbacked(folio) ||
            folio_test_swapcache(folio) || folio_test_unevictable(folio))
                return;

        lruvec_del_folio(lruvec, folio);
        folio_clear_active(folio);
        folio_clear_referenced(folio);
        /*
         * Lazyfree folios are clean anonymous folios.  They have
         * the swapbacked flag cleared, to distinguish them from normal
         * anonymous folios
         */
        folio_clear_swapbacked(folio);
        lruvec_add_folio(lruvec, folio);

        __count_vm_events(PGLAZYFREE, nr_pages);
        __count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE, nr_pages);
}

Thanks
Barry