mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
In alloc_demote_folio(), mtc->nmask is set to NULL for the first
allocation. If that succeeds, it returns without restoring mtc->nmask
to allowed_mask. For subsequent allocations from the migrate_pages()
batch, mtc->nmask will be NULL. If the target node then becomes full,
the fallback allocation will use nmask = NULL, allocating from any
node allowed by the task cpuset, which for kswapd is all nodes.
To address this issue, restore the mtc->nmask to its original allowed
nodemask after the first allocation.
Signed-off-by: Bing Jiao <bingjiao@google.com>
---
mm/vmscan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cbffc0a27824..b42abd17aee7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -985,11 +985,11 @@ static struct folio *alloc_demote_folio(struct folio *src,
mtc->nmask = NULL;
mtc->gfp_mask |= __GFP_THISNODE;
dst = alloc_migration_target(src, (unsigned long)mtc);
+ mtc->nmask = allowed_mask;
if (dst)
return dst;
mtc->gfp_mask &= ~__GFP_THISNODE;
- mtc->nmask = allowed_mask;
return alloc_migration_target(src, (unsigned long)mtc);
}
--
2.53.0.473.g4a7958ca14-goog
On 3/2/26 08:03, Bing Jiao wrote:
> In alloc_demote_folio(), mtc->nmask is set to NULL for the first
> allocation. If that succeeds, it returns without restoring mtc->nmask
> to allowed_mask. For subsequent allocations from the migrate_pages()
> batch, mtc->nmask will be NULL. If the target node then becomes full,
> the fallback allocation will use nmask = NULL, allocating from any
> node allowed by the task cpuset, which for kswapd is all nodes.
>
> To address this issue, restore the mtc->nmask to its original allowed
> nodemask after the first allocation.
>
That would be
Fixes: 320080272892 ("mm/demotion: demote pages according to allocation fallback order")
?
> Signed-off-by: Bing Jiao <bingjiao@google.com>
> ---
> mm/vmscan.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index cbffc0a27824..b42abd17aee7 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -985,11 +985,11 @@ static struct folio *alloc_demote_folio(struct folio *src,
> mtc->nmask = NULL;
> mtc->gfp_mask |= __GFP_THISNODE;
> dst = alloc_migration_target(src, (unsigned long)mtc);
> + mtc->nmask = allowed_mask;
> if (dst)
> return dst;
>
> mtc->gfp_mask &= ~__GFP_THISNODE;
> - mtc->nmask = allowed_mask;
>
> return alloc_migration_target(src, (unsigned long)mtc);
> }
> --
> 2.53.0.473.g4a7958ca14-goog
>
Maybe we should just not touch the original mtc?
diff --git a/mm/vmscan.c b/mm/vmscan.c
index de62225b381a..f07716e5389e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -985,9 +985,9 @@ static void folio_check_dirty_writeback(struct folio *folio,
static struct folio *alloc_demote_folio(struct folio *src,
unsigned long private)
{
+ struct migration_target_control *mtc, target_nid_mtc;
struct folio *dst;
nodemask_t *allowed_mask;
- struct migration_target_control *mtc;
mtc = (struct migration_target_control *)private;
@@ -1001,15 +1001,12 @@ static struct folio *alloc_demote_folio(struct folio *src,
* a demotion of cold pages from the target memtier. This can result
* in the kernel placing hot pages in slower(lower) memory tiers.
*/
- mtc->nmask = NULL;
- mtc->gfp_mask |= __GFP_THISNODE;
- dst = alloc_migration_target(src, (unsigned long)mtc);
+ target_nid_mtc = *mtc;
+ target_nid_mtc.nmask = NULL;
+ target_nid_mtc.gfp_mask |= __GFP_THISNODE;
+ dst = alloc_migration_target(src, (unsigned long)&target_nid_mtc);
if (dst)
return dst;
-
- mtc->gfp_mask &= ~__GFP_THISNODE;
- mtc->nmask = allowed_mask;
-
return alloc_migration_target(src, (unsigned long)mtc);
}
--
Cheers,
David
On Mon, Mar 02, 2026 at 09:00:07AM +0100, David Hildenbrand (Arm) wrote:
> On 3/2/26 08:03, Bing Jiao wrote:
> > In alloc_demote_folio(), mtc->nmask is set to NULL for the first
> > allocation. If that succeeds, it returns without restoring mtc->nmask
> > to allowed_mask. For subsequent allocations from the migrate_pages()
> > batch, mtc->nmask will be NULL. If the target node then becomes full,
> > the fallback allocation will use nmask = NULL, allocating from any
> > node allowed by the task cpuset, which for kswapd is all nodes.
> >
> > To address this issue, restore the mtc->nmask to its original allowed
> > nodemask after the first allocation.
> >
>
> That would be
>
> Fixes: 320080272892 ("mm/demotion: demote pages according to allocation fallback order")
>
> ?
Thanks for pointing it out. Will add it in the new patch.
> > Signed-off-by: Bing Jiao <bingjiao@google.com>
> > ---
> > mm/vmscan.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > index cbffc0a27824..b42abd17aee7 100644
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -985,11 +985,11 @@ static struct folio *alloc_demote_folio(struct folio *src,
> > mtc->nmask = NULL;
> > mtc->gfp_mask |= __GFP_THISNODE;
> > dst = alloc_migration_target(src, (unsigned long)mtc);
> > + mtc->nmask = allowed_mask;
> > if (dst)
> > return dst;
> >
> > mtc->gfp_mask &= ~__GFP_THISNODE;
> > - mtc->nmask = allowed_mask;
> >
> > return alloc_migration_target(src, (unsigned long)mtc);
> > }
> > --
> > 2.53.0.473.g4a7958ca14-goog
> >
>
> Maybe we should just not touch the original mtc?
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index de62225b381a..f07716e5389e 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -985,9 +985,9 @@ static void folio_check_dirty_writeback(struct folio *folio,
> static struct folio *alloc_demote_folio(struct folio *src,
> unsigned long private)
> {
> + struct migration_target_control *mtc, target_nid_mtc;
> struct folio *dst;
> nodemask_t *allowed_mask;
> - struct migration_target_control *mtc;
>
> mtc = (struct migration_target_control *)private;
>
> @@ -1001,15 +1001,12 @@ static struct folio *alloc_demote_folio(struct folio *src,
> * a demotion of cold pages from the target memtier. This can result
> * in the kernel placing hot pages in slower(lower) memory tiers.
> */
> - mtc->nmask = NULL;
> - mtc->gfp_mask |= __GFP_THISNODE;
> - dst = alloc_migration_target(src, (unsigned long)mtc);
> + target_nid_mtc = *mtc;
> + target_nid_mtc.nmask = NULL;
> + target_nid_mtc.gfp_mask |= __GFP_THISNODE;
> + dst = alloc_migration_target(src, (unsigned long)&target_nid_mtc);
> if (dst)
> return dst;
> -
> - mtc->gfp_mask &= ~__GFP_THISNODE;
> - mtc->nmask = allowed_mask;
> -
> return alloc_migration_target(src, (unsigned long)mtc);
> }
>
>
>
> --
> Cheers,
>
> David
Thank you for the suggestion, David.
I agree that not touching the original mtc is a better. It makes
the distinction between the two allocation attempts much clearer
and avoids the side-effect bug. Will update it then.
Best,
Bing
In alloc_demote_folio(), mtc->nmask is set to NULL for the first
allocation. If that succeeds, it returns without restoring mtc->nmask
to allowed_mask. For subsequent allocations from the migrate_pages()
batch, mtc->nmask will be NULL. If the target node then becomes full,
the fallback allocation will use nmask = NULL, allocating from any
node allowed by the task cpuset, which for kswapd is all nodes.
To address this issue, use a local copy of the mtc structure with
nmask = NULL for the first allocation attempt specifically, ensuring
the original mtc remains unmodified.
Fixes: 320080272892 ("mm/demotion: demote pages according to allocation fallback order")
Signed-off-by: Bing Jiao <bingjiao@google.com>
---
mm/vmscan.c | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cbffc0a27824..c4e0ce737e03 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -966,13 +966,11 @@ static void folio_check_dirty_writeback(struct folio *folio,
static struct folio *alloc_demote_folio(struct folio *src,
unsigned long private)
{
+ struct migration_target_control *mtc, target_nid_mtc;
struct folio *dst;
- nodemask_t *allowed_mask;
- struct migration_target_control *mtc;
mtc = (struct migration_target_control *)private;
- allowed_mask = mtc->nmask;
/*
* make sure we allocate from the target node first also trying to
* demote or reclaim pages from the target node via kswapd if we are
@@ -982,15 +980,13 @@ static struct folio *alloc_demote_folio(struct folio *src,
* a demotion of cold pages from the target memtier. This can result
* in the kernel placing hot pages in slower(lower) memory tiers.
*/
- mtc->nmask = NULL;
- mtc->gfp_mask |= __GFP_THISNODE;
- dst = alloc_migration_target(src, (unsigned long)mtc);
+ target_nid_mtc = *mtc;
+ target_nid_mtc.nmask = NULL;
+ target_nid_mtc.gfp_mask |= __GFP_THISNODE;
+ dst = alloc_migration_target(src, (unsigned long)&target_nid_mtc);
if (dst)
return dst;
- mtc->gfp_mask &= ~__GFP_THISNODE;
- mtc->nmask = allowed_mask;
-
return alloc_migration_target(src, (unsigned long)mtc);
}
--
2.53.0.473.g4a7958ca14-goog
On Tue, Mar 03, 2026 at 05:25:17AM +0000, Bing Jiao wrote:
> In alloc_demote_folio(), mtc->nmask is set to NULL for the first
> allocation. If that succeeds, it returns without restoring mtc->nmask
> to allowed_mask. For subsequent allocations from the migrate_pages()
> batch, mtc->nmask will be NULL. If the target node then becomes full,
> the fallback allocation will use nmask = NULL, allocating from any
> node allowed by the task cpuset, which for kswapd is all nodes.
>
> To address this issue, use a local copy of the mtc structure with
> nmask = NULL for the first allocation attempt specifically, ensuring
> the original mtc remains unmodified.
>
> Fixes: 320080272892 ("mm/demotion: demote pages according to allocation fallback order")
> Signed-off-by: Bing Jiao <bingjiao@google.com>
LGTM, so:
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> ---
> mm/vmscan.c | 14 +++++---------
> 1 file changed, 5 insertions(+), 9 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index cbffc0a27824..c4e0ce737e03 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -966,13 +966,11 @@ static void folio_check_dirty_writeback(struct folio *folio,
> static struct folio *alloc_demote_folio(struct folio *src,
> unsigned long private)
> {
> + struct migration_target_control *mtc, target_nid_mtc;
> struct folio *dst;
> - nodemask_t *allowed_mask;
> - struct migration_target_control *mtc;
>
> mtc = (struct migration_target_control *)private;
>
> - allowed_mask = mtc->nmask;
> /*
> * make sure we allocate from the target node first also trying to
> * demote or reclaim pages from the target node via kswapd if we are
> @@ -982,15 +980,13 @@ static struct folio *alloc_demote_folio(struct folio *src,
> * a demotion of cold pages from the target memtier. This can result
> * in the kernel placing hot pages in slower(lower) memory tiers.
> */
> - mtc->nmask = NULL;
> - mtc->gfp_mask |= __GFP_THISNODE;
> - dst = alloc_migration_target(src, (unsigned long)mtc);
> + target_nid_mtc = *mtc;
> + target_nid_mtc.nmask = NULL;
> + target_nid_mtc.gfp_mask |= __GFP_THISNODE;
> + dst = alloc_migration_target(src, (unsigned long)&target_nid_mtc);
> if (dst)
> return dst;
>
> - mtc->gfp_mask &= ~__GFP_THISNODE;
> - mtc->nmask = allowed_mask;
> -
> return alloc_migration_target(src, (unsigned long)mtc);
> }
>
> --
> 2.53.0.473.g4a7958ca14-goog
>
On 3/3/26 06:25, Bing Jiao wrote:
> In alloc_demote_folio(), mtc->nmask is set to NULL for the first
> allocation. If that succeeds, it returns without restoring mtc->nmask
> to allowed_mask. For subsequent allocations from the migrate_pages()
> batch, mtc->nmask will be NULL. If the target node then becomes full,
> the fallback allocation will use nmask = NULL, allocating from any
> node allowed by the task cpuset, which for kswapd is all nodes.
>
> To address this issue, use a local copy of the mtc structure with
> nmask = NULL for the first allocation attempt specifically, ensuring
> the original mtc remains unmodified.
>
> Fixes: 320080272892 ("mm/demotion: demote pages according to allocation fallback order")
> Signed-off-by: Bing Jiao <bingjiao@google.com>
> ---
> mm/vmscan.c | 14 +++++---------
> 1 file changed, 5 insertions(+), 9 deletions(-)
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
--
Cheers,
David
© 2016 - 2026 Red Hat, Inc.