From: Kairui Song <kasong@tencent.com>
Make the scan helpers return the exact number of folios being scanned
or isolated. This should make the scan more accurate and easier to
follow.
Now there is no more need for special handling when there is no
progress made. The old livelock prevention `(return isolated ||
!remaining ? scanned : 0)` is replaced by the natural scan budget
exhaustion in try_to_shrink_lruvec, and sort_folio moves ineligible
folios to newer generations.
Signed-off-by: Kairui Song <kasong@tencent.com>
---
mm/vmscan.c | 27 +++++++++++----------------
1 file changed, 11 insertions(+), 16 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ed5b5f8dd3c7..4f4548ff3a17 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4680,7 +4680,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
struct scan_control *sc, int type, int tier,
- struct list_head *list)
+ struct list_head *list, int *isolatedp)
{
int i;
int gen;
@@ -4750,11 +4750,9 @@ static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
if (type == LRU_GEN_FILE)
sc->nr.file_taken += isolated;
- /*
- * There might not be eligible folios due to reclaim_idx. Check the
- * remaining to prevent livelock if it's not making progress.
- */
- return isolated || !remaining ? scanned : 0;
+
+ *isolatedp = isolated;
+ return scanned;
}
static int get_tier_idx(struct lruvec *lruvec, int type)
@@ -4819,23 +4817,24 @@ static int isolate_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
int *type_scanned, struct list_head *list)
{
int i;
+ int scanned = 0;
+ int isolated = 0;
int type = get_type_to_scan(lruvec, swappiness);
for_each_evictable_type(i, swappiness) {
- int scanned;
int tier = get_tier_idx(lruvec, type);
*type_scanned = type;
- scanned = scan_folios(nr_to_scan, lruvec, sc,
- type, tier, list);
- if (scanned)
+ scanned += scan_folios(nr_to_scan, lruvec, sc,
+ type, tier, list, &isolated);
+ if (isolated)
return scanned;
type = !type;
}
- return 0;
+ return scanned;
}
static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
@@ -4852,7 +4851,6 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
struct reclaim_stat stat;
struct lru_gen_mm_walk *walk;
bool skip_retry = false;
- struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@@ -4860,10 +4858,7 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
scanned = isolate_folios(nr_to_scan, lruvec, sc, swappiness, &type, &list);
- scanned += try_to_inc_min_seq(lruvec, swappiness);
-
- if (evictable_min_seq(lrugen->min_seq, swappiness) + MIN_NR_GENS > lrugen->max_seq)
- scanned = 0;
+ try_to_inc_min_seq(lruvec, swappiness);
lruvec_unlock_irq(lruvec);
--
2.53.0
On Tue, Mar 17, 2026 at 12:11 PM Kairui Song via B4 Relay
<devnull+kasong.tencent.com@kernel.org> wrote:
>
> From: Kairui Song <kasong@tencent.com>
>
> Make the scan helpers return the exact number of folios being scanned
> or isolated. This should make the scan more accurate and easier to
> follow.
>
> Now there is no more need for special handling when there is no
> progress made. The old livelock prevention `(return isolated ||
> !remaining ? scanned : 0)` is replaced by the natural scan budget
> exhaustion in try_to_shrink_lruvec, and sort_folio moves ineligible
> folios to newer generations.
>
> Signed-off-by: Kairui Song <kasong@tencent.com>
> ---
> mm/vmscan.c | 27 +++++++++++----------------
> 1 file changed, 11 insertions(+), 16 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index ed5b5f8dd3c7..4f4548ff3a17 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -4680,7 +4680,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
>
> static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
> struct scan_control *sc, int type, int tier,
> - struct list_head *list)
> + struct list_head *list, int *isolatedp)
> {
> int i;
> int gen;
> @@ -4750,11 +4750,9 @@ static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
> type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
> if (type == LRU_GEN_FILE)
> sc->nr.file_taken += isolated;
> - /*
> - * There might not be eligible folios due to reclaim_idx. Check the
> - * remaining to prevent livelock if it's not making progress.
> - */
> - return isolated || !remaining ? scanned : 0;
> +
> + *isolatedp = isolated;
> + return scanned;
> }
>
> static int get_tier_idx(struct lruvec *lruvec, int type)
> @@ -4819,23 +4817,24 @@ static int isolate_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
> int *type_scanned, struct list_head *list)
> {
> int i;
> + int scanned = 0;
> + int isolated = 0;
> int type = get_type_to_scan(lruvec, swappiness);
>
> for_each_evictable_type(i, swappiness) {
> - int scanned;
> int tier = get_tier_idx(lruvec, type);
>
> *type_scanned = type;
I think this is problematic, now `isolate_folios` can scan a nonzero
amount of > 1 type of memory. Then the caller (`evict_folios`) calls
`trace_mm_vmscan_lru_shrink_inactive` with the total scanned amount,
with only the last type we scanned (misattributing part of the scan,
potentially). Not a "functional" issue, but it could mean confusing
data for anyone watching the tracepoint.
>
> - scanned = scan_folios(nr_to_scan, lruvec, sc,
> - type, tier, list);
> - if (scanned)
> + scanned += scan_folios(nr_to_scan, lruvec, sc,
> + type, tier, list, &isolated);
> + if (isolated)
> return scanned;
>
> type = !type;
> }
>
> - return 0;
> + return scanned;
> }
>
> static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
> @@ -4852,7 +4851,6 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
> struct reclaim_stat stat;
> struct lru_gen_mm_walk *walk;
> bool skip_retry = false;
> - struct lru_gen_folio *lrugen = &lruvec->lrugen;
> struct mem_cgroup *memcg = lruvec_memcg(lruvec);
> struct pglist_data *pgdat = lruvec_pgdat(lruvec);
>
> @@ -4860,10 +4858,7 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
>
> scanned = isolate_folios(nr_to_scan, lruvec, sc, swappiness, &type, &list);
>
> - scanned += try_to_inc_min_seq(lruvec, swappiness);
> -
> - if (evictable_min_seq(lrugen->min_seq, swappiness) + MIN_NR_GENS > lrugen->max_seq)
> - scanned = 0;
> + try_to_inc_min_seq(lruvec, swappiness);
IIUC, this change is what introduces the issue patch 6 is trying to
resolve. Is it worth squashing patch 6 in to this one, so we don't
have this non-ideal intermediate state?
>
> lruvec_unlock_irq(lruvec);
>
>
> --
> 2.53.0
>
>
On Sat, Mar 21, 2026 at 4:59 AM Axel Rasmussen <axelrasmussen@google.com> wrote:
>
> On Tue, Mar 17, 2026 at 12:11 PM Kairui Song via B4 Relay
> <devnull+kasong.tencent.com@kernel.org> wrote:
> >
> > From: Kairui Song <kasong@tencent.com>
> >
> > Make the scan helpers return the exact number of folios being scanned
> > or isolated. This should make the scan more accurate and easier to
> > follow.
> >
> > Now there is no more need for special handling when there is no
> > progress made. The old livelock prevention `(return isolated ||
> > !remaining ? scanned : 0)` is replaced by the natural scan budget
> > exhaustion in try_to_shrink_lruvec, and sort_folio moves ineligible
> > folios to newer generations.
> >
> > Signed-off-by: Kairui Song <kasong@tencent.com>
> > ---
> > mm/vmscan.c | 27 +++++++++++----------------
> > 1 file changed, 11 insertions(+), 16 deletions(-)
> >
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > index ed5b5f8dd3c7..4f4548ff3a17 100644
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -4680,7 +4680,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
> >
> > static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
> > struct scan_control *sc, int type, int tier,
> > - struct list_head *list)
> > + struct list_head *list, int *isolatedp)
> > {
> > int i;
> > int gen;
> > @@ -4750,11 +4750,9 @@ static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
> > type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
> > if (type == LRU_GEN_FILE)
> > sc->nr.file_taken += isolated;
> > - /*
> > - * There might not be eligible folios due to reclaim_idx. Check the
> > - * remaining to prevent livelock if it's not making progress.
> > - */
> > - return isolated || !remaining ? scanned : 0;
> > +
> > + *isolatedp = isolated;
> > + return scanned;
> > }
> >
> > static int get_tier_idx(struct lruvec *lruvec, int type)
> > @@ -4819,23 +4817,24 @@ static int isolate_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
> > int *type_scanned, struct list_head *list)
> > {
> > int i;
> > + int scanned = 0;
> > + int isolated = 0;
> > int type = get_type_to_scan(lruvec, swappiness);
> >
> > for_each_evictable_type(i, swappiness) {
> > - int scanned;
> > int tier = get_tier_idx(lruvec, type);
> >
> > *type_scanned = type;
>
> I think this is problematic, now `isolate_folios` can scan a nonzero
> amount of > 1 type of memory. Then the caller (`evict_folios`) calls
> `trace_mm_vmscan_lru_shrink_inactive` with the total scanned amount,
> with only the last type we scanned (misattributing part of the scan,
> potentially). Not a "functional" issue, but it could mean confusing
> data for anyone watching the tracepoint.
Thanks! Nice catch, I'll introduce another variable for the tracepoint
then it should be fine.
>
>
> >
> > - scanned = scan_folios(nr_to_scan, lruvec, sc,
> > - type, tier, list);
> > - if (scanned)
> > + scanned += scan_folios(nr_to_scan, lruvec, sc,
> > + type, tier, list, &isolated);
> > + if (isolated)
> > return scanned;
> >
> > type = !type;
> > }
> >
> > - return 0;
> > + return scanned;
> > }
> >
> > static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
> > @@ -4852,7 +4851,6 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
> > struct reclaim_stat stat;
> > struct lru_gen_mm_walk *walk;
> > bool skip_retry = false;
> > - struct lru_gen_folio *lrugen = &lruvec->lrugen;
> > struct mem_cgroup *memcg = lruvec_memcg(lruvec);
> > struct pglist_data *pgdat = lruvec_pgdat(lruvec);
> >
> > @@ -4860,10 +4858,7 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
> >
> > scanned = isolate_folios(nr_to_scan, lruvec, sc, swappiness, &type, &list);
> >
> > - scanned += try_to_inc_min_seq(lruvec, swappiness);
> > -
> > - if (evictable_min_seq(lrugen->min_seq, swappiness) + MIN_NR_GENS > lrugen->max_seq)
> > - scanned = 0;
> > + try_to_inc_min_seq(lruvec, swappiness);
>
> IIUC, this change is what introduces the issue patch 6 is trying to
> resolve. Is it worth squashing patch 6 in to this one, so we don't
> have this non-ideal intermediate state?
Well it's not, patch 6 is fixing an existing problem, see the cover
letter about the OOM issue.
This part of changing is just cleanup the loop code. It looks really
strange to me that increasing min_seq is considered as scanning one
folio. Aborting the scan if there is only 2 gen kind of make sense but
this doesn't seems the right place. These strange parts to avoid
livelock can be dropped since we have an exact count of folios being
scanned now. I'll add more words in the commit message.
On 2026/3/23 0:20, Kairui Song wrote:
> On Sat, Mar 21, 2026 at 4:59 AM Axel Rasmussen <axelrasmussen@google.com> wrote:
>>
>> On Tue, Mar 17, 2026 at 12:11 PM Kairui Song via B4 Relay
>> <devnull+kasong.tencent.com@kernel.org> wrote:
>>>
>>> From: Kairui Song <kasong@tencent.com>
>>>
>>> Make the scan helpers return the exact number of folios being scanned
>>> or isolated. This should make the scan more accurate and easier to
>>> follow.
>>>
>>> Now there is no more need for special handling when there is no
>>> progress made. The old livelock prevention `(return isolated ||
>>> !remaining ? scanned : 0)` is replaced by the natural scan budget
>>> exhaustion in try_to_shrink_lruvec, and sort_folio moves ineligible
>>> folios to newer generations.
>>>
>>> Signed-off-by: Kairui Song <kasong@tencent.com>
>>> ---
>>> mm/vmscan.c | 27 +++++++++++----------------
>>> 1 file changed, 11 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>>> index ed5b5f8dd3c7..4f4548ff3a17 100644
>>> --- a/mm/vmscan.c
>>> +++ b/mm/vmscan.c
>>> @@ -4680,7 +4680,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
>>>
>>> static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
>>> struct scan_control *sc, int type, int tier,
>>> - struct list_head *list)
>>> + struct list_head *list, int *isolatedp)
>>> {
>>> int i;
>>> int gen;
>>> @@ -4750,11 +4750,9 @@ static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
>>> type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
>>> if (type == LRU_GEN_FILE)
>>> sc->nr.file_taken += isolated;
>>> - /*
>>> - * There might not be eligible folios due to reclaim_idx. Check the
>>> - * remaining to prevent livelock if it's not making progress.
>>> - */
>>> - return isolated || !remaining ? scanned : 0;
>>> +
>>> + *isolatedp = isolated;
>>> + return scanned;
>>> }
>>>
>>> static int get_tier_idx(struct lruvec *lruvec, int type)
>>> @@ -4819,23 +4817,24 @@ static int isolate_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
>>> int *type_scanned, struct list_head *list)
>>> {
>>> int i;
>>> + int scanned = 0;
>>> + int isolated = 0;
>>> int type = get_type_to_scan(lruvec, swappiness);
>>>
>>> for_each_evictable_type(i, swappiness) {
>>> - int scanned;
>>> int tier = get_tier_idx(lruvec, type);
>>>
>>> *type_scanned = type;
>>
>> I think this is problematic, now `isolate_folios` can scan a nonzero
>> amount of > 1 type of memory. Then the caller (`evict_folios`) calls
>> `trace_mm_vmscan_lru_shrink_inactive` with the total scanned amount,
>> with only the last type we scanned (misattributing part of the scan,
>> potentially). Not a "functional" issue, but it could mean confusing
>> data for anyone watching the tracepoint.
>
> Thanks! Nice catch, I'll introduce another variable for the tracepoint
> then it should be fine.
>
>>
>>
>>>
>>> - scanned = scan_folios(nr_to_scan, lruvec, sc,
>>> - type, tier, list);
>>> - if (scanned)
>>> + scanned += scan_folios(nr_to_scan, lruvec, sc,
>>> + type, tier, list, &isolated);
>>> + if (isolated)
>>> return scanned;
>>>
>>> type = !type;
>>> }
>>>
>>> - return 0;
>>> + return scanned;
>>> }
>>>
>>> static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
>>> @@ -4852,7 +4851,6 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
>>> struct reclaim_stat stat;
>>> struct lru_gen_mm_walk *walk;
>>> bool skip_retry = false;
>>> - struct lru_gen_folio *lrugen = &lruvec->lrugen;
>>> struct mem_cgroup *memcg = lruvec_memcg(lruvec);
>>> struct pglist_data *pgdat = lruvec_pgdat(lruvec);
>>>
>>> @@ -4860,10 +4858,7 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
>>>
>>> scanned = isolate_folios(nr_to_scan, lruvec, sc, swappiness, &type, &list);
>>>
>>> - scanned += try_to_inc_min_seq(lruvec, swappiness);
>>> -
>>> - if (evictable_min_seq(lrugen->min_seq, swappiness) + MIN_NR_GENS > lrugen->max_seq)
>>> - scanned = 0;
>>> + try_to_inc_min_seq(lruvec, swappiness);
>>
>> IIUC, this change is what introduces the issue patch 6 is trying to
>> resolve. Is it worth squashing patch 6 in to this one, so we don't
>> have this non-ideal intermediate state?
>
> Well it's not, patch 6 is fixing an existing problem, see the cover
> letter about the OOM issue.
>
> This part of changing is just cleanup the loop code. It looks really
> strange to me that increasing min_seq is considered as scanning one
> folio. Aborting the scan if there is only 2 gen kind of make sense but
> this doesn't seems the right place. These strange parts to avoid
> livelock can be dropped since we have an exact count of folios being
> scanned now. I'll add more words in the commit message.
This change confused me too.
IIUC, this change looks conceptually tied to patch 3. The following change means
that evict_folios should not be invoked if aging is needed. So the judge can be
dropped there, right?
```
static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
{
...
+ if (should_run_aging(lruvec, max_seq, sc, swappiness)) {
+ if (try_to_inc_max_seq(lruvec, max_seq, swappiness, false))
+ need_rotate = true;
+ break;
+ }
```
--
Best regards,
Ridong
On Tue, Mar 24, 2026 at 3:22 PM Chen Ridong <chenridong@huaweicloud.com> wrote:
> On 2026/3/23 0:20, Kairui Song wrote:
> > On Sat, Mar 21, 2026 at 4:59 AM Axel Rasmussen <axelrasmussen@google.com> wrote:
> >>
> >> On Tue, Mar 17, 2026 at 12:11 PM Kairui Song via B4 Relay
> >> <devnull+kasong.tencent.com@kernel.org> wrote:
> >>>
> >>> From: Kairui Song <kasong@tencent.com>
> >>>
> >>> Make the scan helpers return the exact number of folios being scanned
> >>> or isolated. This should make the scan more accurate and easier to
> >>> follow.
> >>>
> >>> Now there is no more need for special handling when there is no
> >>> progress made. The old livelock prevention `(return isolated ||
> >>> !remaining ? scanned : 0)` is replaced by the natural scan budget
> >>> exhaustion in try_to_shrink_lruvec, and sort_folio moves ineligible
> >>> folios to newer generations.
> >>>
> >>> Signed-off-by: Kairui Song <kasong@tencent.com>
...
> >>> static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
> >>> @@ -4852,7 +4851,6 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
> >>> struct reclaim_stat stat;
> >>> struct lru_gen_mm_walk *walk;
> >>> bool skip_retry = false;
> >>> - struct lru_gen_folio *lrugen = &lruvec->lrugen;
> >>> struct mem_cgroup *memcg = lruvec_memcg(lruvec);
> >>> struct pglist_data *pgdat = lruvec_pgdat(lruvec);
> >>>
> >>> @@ -4860,10 +4858,7 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
> >>>
> >>> scanned = isolate_folios(nr_to_scan, lruvec, sc, swappiness, &type, &list);
> >>>
> >>> - scanned += try_to_inc_min_seq(lruvec, swappiness);
> >>> -
> >>> - if (evictable_min_seq(lrugen->min_seq, swappiness) + MIN_NR_GENS > lrugen->max_seq)
> >>> - scanned = 0;
> >>> + try_to_inc_min_seq(lruvec, swappiness);
> >>
> >> IIUC, this change is what introduces the issue patch 6 is trying to
> >> resolve. Is it worth squashing patch 6 in to this one, so we don't
> >> have this non-ideal intermediate state?
> >
> > Well it's not, patch 6 is fixing an existing problem, see the cover
> > letter about the OOM issue.
> >
> > This part of changing is just cleanup the loop code. It looks really
> > strange to me that increasing min_seq is considered as scanning one
> > folio. Aborting the scan if there is only 2 gen kind of make sense but
> > this doesn't seems the right place. These strange parts to avoid
> > livelock can be dropped since we have an exact count of folios being
> > scanned now. I'll add more words in the commit message.
>
> This change confused me too.
>
> IIUC, this change looks conceptually tied to patch 3. The following change means
> that evict_folios should not be invoked if aging is needed. So the judge can be
> dropped there, right?
>
>
> ```
> static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
> {
> ...
> + if (should_run_aging(lruvec, max_seq, sc, swappiness)) {
> + if (try_to_inc_max_seq(lruvec, max_seq, swappiness, false))
> + need_rotate = true;
> + break;
> + }
> ```
>
Hi Ridong,
Ahh yes, as you pointed out, the explicit should_run_aging kind of
guards the evict_folio. That's not everything, besides, previously
isolate_folios may return 0 if there is no folio isolated. But now it
always return the number of folios being scanned, unless there are
only two genes left and hit the force protection, which also makes the
judge here can be dropped.
But not invoking evict_folios if aging is needed is an existing
behavior, that commit (patch 3) didn't change it, just made it cleaner
so we can see it well.
Now the folio scan number combines well with the scan budget
introduced in the previous commit.
And I just noticed it might be even better to move try_to_inc_min_seq
before isolate_folios, to avoid an empty gen blocking isolate_folios.
Usually this won't be an issue since calling try_to_inc_min_seq after
isolate_folios also ensures reclaim won't generate any problematic
empty gen, but removing folio by things like freeing could introduce
one or two empty gens.
The forced gen protection may cause other problems but that's
irrelevant to this commit, should be improved later.
On 2026/3/24 16:05, Kairui Song wrote:
> On Tue, Mar 24, 2026 at 3:22 PM Chen Ridong <chenridong@huaweicloud.com> wrote:
>> On 2026/3/23 0:20, Kairui Song wrote:
>>> On Sat, Mar 21, 2026 at 4:59 AM Axel Rasmussen <axelrasmussen@google.com> wrote:
>>>>
>>>> On Tue, Mar 17, 2026 at 12:11 PM Kairui Song via B4 Relay
>>>> <devnull+kasong.tencent.com@kernel.org> wrote:
>>>>>
>>>>> From: Kairui Song <kasong@tencent.com>
>>>>>
>>>>> Make the scan helpers return the exact number of folios being scanned
>>>>> or isolated. This should make the scan more accurate and easier to
>>>>> follow.
>>>>>
>>>>> Now there is no more need for special handling when there is no
>>>>> progress made. The old livelock prevention `(return isolated ||
>>>>> !remaining ? scanned : 0)` is replaced by the natural scan budget
>>>>> exhaustion in try_to_shrink_lruvec, and sort_folio moves ineligible
>>>>> folios to newer generations.
>>>>>
>>>>> Signed-off-by: Kairui Song <kasong@tencent.com>
>
> ...
>
>>>>> static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
>>>>> @@ -4852,7 +4851,6 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
>>>>> struct reclaim_stat stat;
>>>>> struct lru_gen_mm_walk *walk;
>>>>> bool skip_retry = false;
>>>>> - struct lru_gen_folio *lrugen = &lruvec->lrugen;
>>>>> struct mem_cgroup *memcg = lruvec_memcg(lruvec);
>>>>> struct pglist_data *pgdat = lruvec_pgdat(lruvec);
>>>>>
>>>>> @@ -4860,10 +4858,7 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
>>>>>
>>>>> scanned = isolate_folios(nr_to_scan, lruvec, sc, swappiness, &type, &list);
>>>>>
>>>>> - scanned += try_to_inc_min_seq(lruvec, swappiness);
>>>>> -
>>>>> - if (evictable_min_seq(lrugen->min_seq, swappiness) + MIN_NR_GENS > lrugen->max_seq)
>>>>> - scanned = 0;
>>>>> + try_to_inc_min_seq(lruvec, swappiness);
>>>>
>>>> IIUC, this change is what introduces the issue patch 6 is trying to
>>>> resolve. Is it worth squashing patch 6 in to this one, so we don't
>>>> have this non-ideal intermediate state?
>>>
>>> Well it's not, patch 6 is fixing an existing problem, see the cover
>>> letter about the OOM issue.
>>>
>>> This part of changing is just cleanup the loop code. It looks really
>>> strange to me that increasing min_seq is considered as scanning one
>>> folio. Aborting the scan if there is only 2 gen kind of make sense but
>>> this doesn't seems the right place. These strange parts to avoid
>>> livelock can be dropped since we have an exact count of folios being
>>> scanned now. I'll add more words in the commit message.
>>
>> This change confused me too.
>>
>> IIUC, this change looks conceptually tied to patch 3. The following change means
>> that evict_folios should not be invoked if aging is needed. So the judge can be
>> dropped there, right?
>>
>>
>> ```
>> static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
>> {
>> ...
>> + if (should_run_aging(lruvec, max_seq, sc, swappiness)) {
>> + if (try_to_inc_max_seq(lruvec, max_seq, swappiness, false))
>> + need_rotate = true;
>> + break;
>> + }
>> ```
>>
>
> Hi Ridong,
>
> Ahh yes, as you pointed out, the explicit should_run_aging kind of
> guards the evict_folio. That's not everything, besides, previously
> isolate_folios may return 0 if there is no folio isolated. But now it
> always return the number of folios being scanned, unless there are
> only two genes left and hit the force protection, which also makes the
> judge here can be dropped.
>
> But not invoking evict_folios if aging is needed is an existing
> behavior, that commit (patch 3) didn't change it, just made it cleaner
> so we can see it well.
>
Thanks for the explanation.
Would it be better to combine this change with patch 3, rather than adding to
the commit message?
--
Best regards,
Ridong
On Tue, Mar 24, 2026 at 5:10 PM Chen Ridong <chenridong@huaweicloud.com> wrote: > On 2026/3/24 16:05, Kairui Song wrote: > > Hi Ridong, > > > > Ahh yes, as you pointed out, the explicit should_run_aging kind of > > guards the evict_folio. That's not everything, besides, previously > > isolate_folios may return 0 if there is no folio isolated. But now it > > always return the number of folios being scanned, unless there are > > only two genes left and hit the force protection, which also makes the > > judge here can be dropped. > > > > But not invoking evict_folios if aging is needed is an existing > > behavior, that commit (patch 3) didn't change it, just made it cleaner > > so we can see it well. > > > > Thanks for the explanation. > > Would it be better to combine this change with patch 3, rather than adding to > the commit message? > > -- > Best regards, > Ridong > Hi Ridong, thanks for the suggestion. Patch 3 is already a bit complex I think, so I split this out as a separate patch so the review might be easier. Maybe I can try to merge them later if they still look confusing in V2.
© 2016 - 2026 Red Hat, Inc.