[PATCH v2] mm/mglru: reset page lru tier bits when activating

Wei Xu posted 1 patch 1 month, 1 week ago
include/linux/mm_inline.h | 15 ++++++++++++++-
include/linux/mmzone.h    |  2 ++
mm/vmscan.c               |  8 ++++----
3 files changed, 20 insertions(+), 5 deletions(-)
[PATCH v2] mm/mglru: reset page lru tier bits when activating
Posted by Wei Xu 1 month, 1 week ago
When a folio is activated, lru_gen_add_folio() moves the folio to the
youngest generation.  But unlike folio_update_gen()/folio_inc_gen(),
lru_gen_add_folio() doesn't reset the folio lru tier bits
(LRU_REFS_MASK | LRU_REFS_FLAGS).  This inconsistency can affect how
pages are aged via folio_mark_accessed() (e.g. fd accesses), though
no user visible impact related to this has been detected yet.

Note that lru_gen_add_folio() cannot clear PG_workingset if the
activation is due to workingset refault, otherwise PSI accounting
will be skipped.  So fix lru_gen_add_folio() to clear the lru tier
bits other than PG_workingset when activating a folio, and also
clear all the lru tier bits when a folio is activated via
folio_activate() in lru_gen_look_around().

Fixes: 018ee47f1489 ("mm: multi-gen LRU: exploit locality in rmap")
Signed-off-by: Wei Xu <weixugc@google.com>
---
 include/linux/mm_inline.h | 15 ++++++++++++++-
 include/linux/mmzone.h    |  2 ++
 mm/vmscan.c               |  8 ++++----
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 6f801c7b36e2..355cf46a01a6 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -155,6 +155,11 @@ static inline int folio_lru_refs(struct folio *folio)
 	return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset;
 }
 
+static inline void folio_clear_lru_refs(struct folio *folio)
+{
+	set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0);
+}
+
 static inline int folio_lru_gen(struct folio *folio)
 {
 	unsigned long flags = READ_ONCE(folio->flags);
@@ -222,6 +227,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
 {
 	unsigned long seq;
 	unsigned long flags;
+	unsigned long mask;
 	int gen = folio_lru_gen(folio);
 	int type = folio_is_file_lru(folio);
 	int zone = folio_zonenum(folio);
@@ -257,7 +263,14 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
 	gen = lru_gen_from_seq(seq);
 	flags = (gen + 1UL) << LRU_GEN_PGOFF;
 	/* see the comment on MIN_NR_GENS about PG_active */
-	set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags);
+	mask = LRU_GEN_MASK;
+	/*
+	 * Don't clear PG_workingset here because it can affect PSI accounting
+	 * if the activation is due to workingset refault.
+	 */
+	if (folio_test_active(folio))
+		mask |= LRU_REFS_MASK | BIT(PG_referenced) | BIT(PG_active);
+	set_mask_bits(&folio->flags, mask, flags);
 
 	lru_gen_update_size(lruvec, folio, -1, gen);
 	/* for folio_rotate_reclaimable() */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 17506e4a2835..96dea31fb211 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -403,6 +403,8 @@ enum {
 	NR_LRU_GEN_CAPS
 };
 
+#define LRU_REFS_FLAGS		(BIT(PG_referenced) | BIT(PG_workingset))
+
 #define MIN_LRU_BATCH		BITS_PER_LONG
 #define MAX_LRU_BATCH		(MIN_LRU_BATCH * 64)
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c12f78b042f3..2d0486189804 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2602,8 +2602,6 @@ static bool should_clear_pmd_young(void)
  *                          shorthand helpers
  ******************************************************************************/
 
-#define LRU_REFS_FLAGS	(BIT(PG_referenced) | BIT(PG_workingset))
-
 #define DEFINE_MAX_SEQ(lruvec)						\
 	unsigned long max_seq = READ_ONCE((lruvec)->lrugen.max_seq)
 
@@ -4138,8 +4136,10 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 		old_gen = folio_lru_gen(folio);
 		if (old_gen < 0)
 			folio_set_referenced(folio);
-		else if (old_gen != new_gen)
+		else if (old_gen != new_gen) {
+			folio_clear_lru_refs(folio);
 			folio_activate(folio);
+		}
 	}
 
 	arch_leave_lazy_mmu_mode();
@@ -4370,7 +4370,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
 
 	/* see the comment on MAX_NR_TIERS */
 	if (!folio_test_referenced(folio))
-		set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0);
+		folio_clear_lru_refs(folio);
 
 	/* for shrink_folio_list() */
 	folio_clear_reclaim(folio);
-- 
2.47.0.rc1.288.g06298d1525-goog
Re: [PATCH v2] mm/mglru: reset page lru tier bits when activating
Posted by Wei Xu 1 month, 1 week ago
Changelog since v1:
- Addressed Yu Zhao's comments on not clearing PG_workingset.
- Updated the commit message.

On Thu, Oct 17, 2024 at 11:15 AM Wei Xu <weixugc@google.com> wrote:
>
> When a folio is activated, lru_gen_add_folio() moves the folio to the
> youngest generation.  But unlike folio_update_gen()/folio_inc_gen(),
> lru_gen_add_folio() doesn't reset the folio lru tier bits
> (LRU_REFS_MASK | LRU_REFS_FLAGS).  This inconsistency can affect how
> pages are aged via folio_mark_accessed() (e.g. fd accesses), though
> no user visible impact related to this has been detected yet.
>
> Note that lru_gen_add_folio() cannot clear PG_workingset if the
> activation is due to workingset refault, otherwise PSI accounting
> will be skipped.  So fix lru_gen_add_folio() to clear the lru tier
> bits other than PG_workingset when activating a folio, and also
> clear all the lru tier bits when a folio is activated via
> folio_activate() in lru_gen_look_around().
>
> Fixes: 018ee47f1489 ("mm: multi-gen LRU: exploit locality in rmap")
> Signed-off-by: Wei Xu <weixugc@google.com>
> ---
>  include/linux/mm_inline.h | 15 ++++++++++++++-
>  include/linux/mmzone.h    |  2 ++
>  mm/vmscan.c               |  8 ++++----
>  3 files changed, 20 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
> index 6f801c7b36e2..355cf46a01a6 100644
> --- a/include/linux/mm_inline.h
> +++ b/include/linux/mm_inline.h
> @@ -155,6 +155,11 @@ static inline int folio_lru_refs(struct folio *folio)
>         return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset;
>  }
>
> +static inline void folio_clear_lru_refs(struct folio *folio)
> +{
> +       set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0);
> +}
> +
>  static inline int folio_lru_gen(struct folio *folio)
>  {
>         unsigned long flags = READ_ONCE(folio->flags);
> @@ -222,6 +227,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
>  {
>         unsigned long seq;
>         unsigned long flags;
> +       unsigned long mask;
>         int gen = folio_lru_gen(folio);
>         int type = folio_is_file_lru(folio);
>         int zone = folio_zonenum(folio);
> @@ -257,7 +263,14 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
>         gen = lru_gen_from_seq(seq);
>         flags = (gen + 1UL) << LRU_GEN_PGOFF;
>         /* see the comment on MIN_NR_GENS about PG_active */
> -       set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags);
> +       mask = LRU_GEN_MASK;
> +       /*
> +        * Don't clear PG_workingset here because it can affect PSI accounting
> +        * if the activation is due to workingset refault.
> +        */
> +       if (folio_test_active(folio))
> +               mask |= LRU_REFS_MASK | BIT(PG_referenced) | BIT(PG_active);
> +       set_mask_bits(&folio->flags, mask, flags);
>
>         lru_gen_update_size(lruvec, folio, -1, gen);
>         /* for folio_rotate_reclaimable() */
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 17506e4a2835..96dea31fb211 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -403,6 +403,8 @@ enum {
>         NR_LRU_GEN_CAPS
>  };
>
> +#define LRU_REFS_FLAGS         (BIT(PG_referenced) | BIT(PG_workingset))
> +
>  #define MIN_LRU_BATCH          BITS_PER_LONG
>  #define MAX_LRU_BATCH          (MIN_LRU_BATCH * 64)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index c12f78b042f3..2d0486189804 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2602,8 +2602,6 @@ static bool should_clear_pmd_young(void)
>   *                          shorthand helpers
>   ******************************************************************************/
>
> -#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset))
> -
>  #define DEFINE_MAX_SEQ(lruvec)                                         \
>         unsigned long max_seq = READ_ONCE((lruvec)->lrugen.max_seq)
>
> @@ -4138,8 +4136,10 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
>                 old_gen = folio_lru_gen(folio);
>                 if (old_gen < 0)
>                         folio_set_referenced(folio);
> -               else if (old_gen != new_gen)
> +               else if (old_gen != new_gen) {
> +                       folio_clear_lru_refs(folio);
>                         folio_activate(folio);
> +               }
>         }
>
>         arch_leave_lazy_mmu_mode();
> @@ -4370,7 +4370,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
>
>         /* see the comment on MAX_NR_TIERS */
>         if (!folio_test_referenced(folio))
> -               set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0);
> +               folio_clear_lru_refs(folio);
>
>         /* for shrink_folio_list() */
>         folio_clear_reclaim(folio);
> --
> 2.47.0.rc1.288.g06298d1525-goog
>