From: Chen Ridong <chenridong@huawei.com>
Currently, flush_reclaim_state is placed differently between
shrink_node_memcgs and shrink_many. shrink_many (only used for gen-LRU)
calls it after each lruvec is shrunk, while shrink_node_memcgs calls it
only after all lruvecs have been shrunk.
This patch moves flush_reclaim_state into shrink_node_memcgs and calls it
after each lruvec. This unifies the behavior and is reasonable because:
1. flush_reclaim_state adds current->reclaim_state->reclaimed to
sc->nr_reclaimed.
2. For non-MGLRU root reclaim, this can help stop the iteration earlier
when nr_to_reclaim is reached.
3. For non-root reclaim, the effect is negligible since flush_reclaim_state
does nothing in that case.
After moving flush_reclaim_state into shrink_node_memcgs, shrink_one can be
extended to support both lrugen and non-lrugen paths. It will call
try_to_shrink_lruvec for lrugen root reclaim and shrink_lruvec otherwise.
Signed-off-by: Chen Ridong <chenridong@huawei.com>
---
mm/vmscan.c | 57 +++++++++++++++++++++--------------------------------
1 file changed, 23 insertions(+), 34 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 584f41eb4c14..795f5ebd9341 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4758,23 +4758,7 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
return nr_to_scan < 0;
}
-static void shrink_one(struct lruvec *lruvec, struct scan_control *sc)
-{
- unsigned long scanned = sc->nr_scanned;
- unsigned long reclaimed = sc->nr_reclaimed;
- struct pglist_data *pgdat = lruvec_pgdat(lruvec);
- struct mem_cgroup *memcg = lruvec_memcg(lruvec);
-
- try_to_shrink_lruvec(lruvec, sc);
-
- shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority);
-
- if (!sc->proactive)
- vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned,
- sc->nr_reclaimed - reclaimed);
-
- flush_reclaim_state(sc);
-}
+static void shrink_one(struct lruvec *lruvec, struct scan_control *sc);
static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
{
@@ -5760,6 +5744,27 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
return inactive_lru_pages > pages_for_compaction;
}
+static void shrink_one(struct lruvec *lruvec, struct scan_control *sc)
+{
+ unsigned long scanned = sc->nr_scanned;
+ unsigned long reclaimed = sc->nr_reclaimed;
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
+
+ if (lru_gen_enabled() && root_reclaim(sc))
+ try_to_shrink_lruvec(lruvec, sc);
+ else
+ shrink_lruvec(lruvec, sc);
+
+ shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority);
+
+ if (!sc->proactive)
+ vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned,
+ sc->nr_reclaimed - reclaimed);
+
+ flush_reclaim_state(sc);
+}
+
static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
{
struct mem_cgroup *target_memcg = sc->target_mem_cgroup;
@@ -5784,8 +5789,6 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
memcg = mem_cgroup_iter(target_memcg, NULL, partial);
do {
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
- unsigned long reclaimed;
- unsigned long scanned;
/*
* This loop can become CPU-bound when target memcgs
@@ -5817,19 +5820,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
memcg_memory_event(memcg, MEMCG_LOW);
}
- reclaimed = sc->nr_reclaimed;
- scanned = sc->nr_scanned;
-
- shrink_lruvec(lruvec, sc);
-
- shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
- sc->priority);
-
- /* Record the group's reclaim efficiency */
- if (!sc->proactive)
- vmpressure(sc->gfp_mask, memcg, false,
- sc->nr_scanned - scanned,
- sc->nr_reclaimed - reclaimed);
+ shrink_one(lruvec, sc);
/* If partial walks are allowed, bail once goal is reached */
if (partial && sc->nr_reclaimed >= sc->nr_to_reclaim) {
@@ -5863,8 +5854,6 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
shrink_node_memcgs(pgdat, sc);
- flush_reclaim_state(sc);
-
nr_node_reclaimed = sc->nr_reclaimed - nr_reclaimed;
/* Record the subtree's reclaim efficiency */
--
2.34.1
On Tue, Dec 09, 2025 at 01:25:55AM +0000, Chen Ridong wrote:
> From: Chen Ridong <chenridong@huawei.com>
>
> Currently, flush_reclaim_state is placed differently between
> shrink_node_memcgs and shrink_many. shrink_many (only used for gen-LRU)
> calls it after each lruvec is shrunk, while shrink_node_memcgs calls it
> only after all lruvecs have been shrunk.
>
> This patch moves flush_reclaim_state into shrink_node_memcgs and calls it
> after each lruvec. This unifies the behavior and is reasonable because:
>
> 1. flush_reclaim_state adds current->reclaim_state->reclaimed to
> sc->nr_reclaimed.
> 2. For non-MGLRU root reclaim, this can help stop the iteration earlier
> when nr_to_reclaim is reached.
> 3. For non-root reclaim, the effect is negligible since flush_reclaim_state
> does nothing in that case.
>
> After moving flush_reclaim_state into shrink_node_memcgs, shrink_one can be
> extended to support both lrugen and non-lrugen paths. It will call
> try_to_shrink_lruvec for lrugen root reclaim and shrink_lruvec otherwise.
>
> Signed-off-by: Chen Ridong <chenridong@huawei.com>
> ---
> mm/vmscan.c | 57 +++++++++++++++++++++--------------------------------
> 1 file changed, 23 insertions(+), 34 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 584f41eb4c14..795f5ebd9341 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -4758,23 +4758,7 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
> return nr_to_scan < 0;
> }
>
> -static void shrink_one(struct lruvec *lruvec, struct scan_control *sc)
> -{
> - unsigned long scanned = sc->nr_scanned;
> - unsigned long reclaimed = sc->nr_reclaimed;
> - struct pglist_data *pgdat = lruvec_pgdat(lruvec);
> - struct mem_cgroup *memcg = lruvec_memcg(lruvec);
> -
> - try_to_shrink_lruvec(lruvec, sc);
> -
> - shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority);
> -
> - if (!sc->proactive)
> - vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned,
> - sc->nr_reclaimed - reclaimed);
> -
> - flush_reclaim_state(sc);
> -}
> +static void shrink_one(struct lruvec *lruvec, struct scan_control *sc);
>
> static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
> {
> @@ -5760,6 +5744,27 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
> return inactive_lru_pages > pages_for_compaction;
> }
>
> +static void shrink_one(struct lruvec *lruvec, struct scan_control *sc)
> +{
> + unsigned long scanned = sc->nr_scanned;
> + unsigned long reclaimed = sc->nr_reclaimed;
> + struct pglist_data *pgdat = lruvec_pgdat(lruvec);
> + struct mem_cgroup *memcg = lruvec_memcg(lruvec);
> +
> + if (lru_gen_enabled() && root_reclaim(sc))
> + try_to_shrink_lruvec(lruvec, sc);
> + else
> + shrink_lruvec(lruvec, sc);
Yikes. So we end up with:
shrink_node_memcgs()
shrink_one()
if lru_gen_enabled && root_reclaim(sc)
try_to_shrink_lruvec(lruvec, sc)
else
shrink_lruvec()
if lru_gen_enabled && !root_reclaim(sc)
lru_gen_shrink_lruvec(lruvec, sc)
try_to_shrink_lruvec()
I think it's doing too much at once. Can you get it into the following
shape:
shrink_node_memcgs()
for each memcg:
if lru_gen_enabled:
lru_gen_shrink_lruvec()
else
shrink_lruvec()
and handle the differences in those two functions? Then look for
overlap one level down, and so forth.
On 2025/12/16 5:13, Johannes Weiner wrote:
> On Tue, Dec 09, 2025 at 01:25:55AM +0000, Chen Ridong wrote:
>> From: Chen Ridong <chenridong@huawei.com>
>>
>> Currently, flush_reclaim_state is placed differently between
>> shrink_node_memcgs and shrink_many. shrink_many (only used for gen-LRU)
>> calls it after each lruvec is shrunk, while shrink_node_memcgs calls it
>> only after all lruvecs have been shrunk.
>>
>> This patch moves flush_reclaim_state into shrink_node_memcgs and calls it
>> after each lruvec. This unifies the behavior and is reasonable because:
>>
>> 1. flush_reclaim_state adds current->reclaim_state->reclaimed to
>> sc->nr_reclaimed.
>> 2. For non-MGLRU root reclaim, this can help stop the iteration earlier
>> when nr_to_reclaim is reached.
>> 3. For non-root reclaim, the effect is negligible since flush_reclaim_state
>> does nothing in that case.
>>
>> After moving flush_reclaim_state into shrink_node_memcgs, shrink_one can be
>> extended to support both lrugen and non-lrugen paths. It will call
>> try_to_shrink_lruvec for lrugen root reclaim and shrink_lruvec otherwise.
>>
>> Signed-off-by: Chen Ridong <chenridong@huawei.com>
>> ---
>> mm/vmscan.c | 57 +++++++++++++++++++++--------------------------------
>> 1 file changed, 23 insertions(+), 34 deletions(-)
>>
>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>> index 584f41eb4c14..795f5ebd9341 100644
>> --- a/mm/vmscan.c
>> +++ b/mm/vmscan.c
>> @@ -4758,23 +4758,7 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
>> return nr_to_scan < 0;
>> }
>>
>> -static void shrink_one(struct lruvec *lruvec, struct scan_control *sc)
>> -{
>> - unsigned long scanned = sc->nr_scanned;
>> - unsigned long reclaimed = sc->nr_reclaimed;
>> - struct pglist_data *pgdat = lruvec_pgdat(lruvec);
>> - struct mem_cgroup *memcg = lruvec_memcg(lruvec);
>> -
>> - try_to_shrink_lruvec(lruvec, sc);
>> -
>> - shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority);
>> -
>> - if (!sc->proactive)
>> - vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned,
>> - sc->nr_reclaimed - reclaimed);
>> -
>> - flush_reclaim_state(sc);
>> -}
>> +static void shrink_one(struct lruvec *lruvec, struct scan_control *sc);
>>
>> static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
>> {
>> @@ -5760,6 +5744,27 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
>> return inactive_lru_pages > pages_for_compaction;
>> }
>>
>> +static void shrink_one(struct lruvec *lruvec, struct scan_control *sc)
>> +{
>> + unsigned long scanned = sc->nr_scanned;
>> + unsigned long reclaimed = sc->nr_reclaimed;
>> + struct pglist_data *pgdat = lruvec_pgdat(lruvec);
>> + struct mem_cgroup *memcg = lruvec_memcg(lruvec);
>> +
>> + if (lru_gen_enabled() && root_reclaim(sc))
>> + try_to_shrink_lruvec(lruvec, sc);
>> + else
>> + shrink_lruvec(lruvec, sc);
>
Hi Johannes, thank you for your reply.
> Yikes. So we end up with:
>
> shrink_node_memcgs()
> shrink_one()
> if lru_gen_enabled && root_reclaim(sc)
> try_to_shrink_lruvec(lruvec, sc)
> else
> shrink_lruvec()
> if lru_gen_enabled && !root_reclaim(sc)
> lru_gen_shrink_lruvec(lruvec, sc)
> try_to_shrink_lruvec()
>
> I think it's doing too much at once. Can you get it into the following
> shape:
>
You're absolutely right. This refactoring is indeed what patch 5/5 implements.
With patch 5/5 applied, the flow becomes:
shrink_node_memcgs()
shrink_one()
if lru_gen_enabled
lru_gen_shrink_lruvec --> symmetric with else shrink_lruvec()
if (root_reclaim(sc)) --> handle root reclaim.
try_to_shrink_lruvec()
else
...
try_to_shrink_lruvec()
else
shrink_lruvec()
This matches the structure you described.
One note: shrink_one() is also called from lru_gen_shrink_node() when memcg is disabled, so I
believe it makes sense to keep this helper.
> shrink_node_memcgs()
> for each memcg:
> if lru_gen_enabled:
> lru_gen_shrink_lruvec()
> else
> shrink_lruvec()
>
Regarding the patch split, I currently kept patch 3/5 and 5/5 separate to make the changes clearer
in each step. Would you prefer that I merge patch 3/5 with patch 5/5, so the full refactoring
appears in one patch?
Looking forward to your guidance.
> and handle the differences in those two functions? Then look for
> overlap one level down, and so forth.
--
Best regards,
Ridong
Hi Chen, kernel test robot noticed the following build warnings: [auto build test WARNING on akpm-mm/mm-everything] url: https://github.com/intel-lab-lkp/linux/commits/Chen-Ridong/mm-mglru-use-mem_cgroup_iter-for-global-reclaim/20251209-094913 base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything patch link: https://lore.kernel.org/r/20251209012557.1949239-4-chenridong%40huaweicloud.com patch subject: [PATCH -next 3/5] mm/mglru: extend shrink_one for both lrugen and non-lrugen config: x86_64-randconfig-004-20251212 (https://download.01.org/0day-ci/archive/20251212/202512121027.03z9qd08-lkp@intel.com/config) compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251212/202512121027.03z9qd08-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202512121027.03z9qd08-lkp@intel.com/ All warnings (new ones prefixed by >>): >> mm/vmscan.o: warning: objtool: shrink_one+0xeb2: sibling call from callable instruction with modified stack frame -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On 2025/12/12 10:55, kernel test robot wrote:
> Hi Chen,
>
> kernel test robot noticed the following build warnings:
>
> [auto build test WARNING on akpm-mm/mm-everything]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Chen-Ridong/mm-mglru-use-mem_cgroup_iter-for-global-reclaim/20251209-094913
> base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> patch link: https://lore.kernel.org/r/20251209012557.1949239-4-chenridong%40huaweicloud.com
> patch subject: [PATCH -next 3/5] mm/mglru: extend shrink_one for both lrugen and non-lrugen
> config: x86_64-randconfig-004-20251212 (https://download.01.org/0day-ci/archive/20251212/202512121027.03z9qd08-lkp@intel.com/config)
> compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251212/202512121027.03z9qd08-lkp@intel.com/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202512121027.03z9qd08-lkp@intel.com/
>
> All warnings (new ones prefixed by >>):
>
>>> mm/vmscan.o: warning: objtool: shrink_one+0xeb2: sibling call from callable instruction with modified stack frame
>
This is the first time I've encountered this warning. While adding
`STACK_FRAME_NON_STANDARD(shrink_one)` resolves it, I noticed this approach isn't widely used in the
codebase. Is this the standard solution, or are there better alternatives?
I've tested that the warning persists even when `shrink_one` is simplified to only call `shrink_lruvec`:
```
static void shrink_one(struct lruvec *lruvec, struct scan_control *sc)
{
shrink_lruvec(lruvec, sc);
}
```
How can we properly avoid this warning without using STACK_FRAME_NON_STANDARD?
--
Best regards,
Ridong
© 2016 - 2025 Red Hat, Inc.