[PATCH 1/4] mm: Move demotion related functions in memory-tiers.c

Alexandre Ghiti posted 4 patches 3 weeks, 6 days ago
[PATCH 1/4] mm: Move demotion related functions in memory-tiers.c
Posted by Alexandre Ghiti 3 weeks, 6 days ago
Let's have all the demotion functions in this file, no functional
change intended.

Suggested-by: Gregory Price <gourry@gourry.net>
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
---
 include/linux/memory-tiers.h | 18 ++++++++
 mm/memory-tiers.c            | 75 +++++++++++++++++++++++++++++++++
 mm/vmscan.c                  | 80 +-----------------------------------
 3 files changed, 94 insertions(+), 79 deletions(-)

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 96987d9d95a8..0bf0d002939e 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -56,6 +56,9 @@ void mt_put_memory_types(struct list_head *memory_types);
 int next_demotion_node(int node, const nodemask_t *allowed_mask);
 void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
 bool node_is_toptier(int node);
+unsigned int mt_demote_folios(struct list_head *demote_folios,
+			      struct pglist_data *pgdat,
+			      struct mem_cgroup *memcg);
 #else
 static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
 {
@@ -71,6 +74,14 @@ static inline bool node_is_toptier(int node)
 {
 	return true;
 }
+
+static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
+					    struct pglist_data *pgdat,
+					    struct mem_cgroup *memcg)
+{
+	return 0;
+}
+
 #endif
 
 #else
@@ -116,6 +127,13 @@ static inline bool node_is_toptier(int node)
 	return true;
 }
 
+static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
+					    struct pglist_data *pgdat,
+					    struct mem_cgroup *memcg)
+{
+	return 0;
+}
+
 static inline int register_mt_adistance_algorithm(struct notifier_block *nb)
 {
 	return 0;
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 986f809376eb..afdf21738a54 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -7,6 +7,7 @@
 #include <linux/memory-tiers.h>
 #include <linux/notifier.h>
 #include <linux/sched/sysctl.h>
+#include <linux/migrate.h>
 
 #include "internal.h"
 
@@ -373,6 +374,80 @@ int next_demotion_node(int node, const nodemask_t *allowed_mask)
 	return find_next_best_node(node, &mask);
 }
 
+static struct folio *alloc_demote_folio(struct folio *src,
+					unsigned long private)
+{
+	struct folio *dst;
+	nodemask_t *allowed_mask;
+	struct migration_target_control *mtc;
+
+	mtc = (struct migration_target_control *)private;
+
+	allowed_mask = mtc->nmask;
+	/*
+	 * make sure we allocate from the target node first also trying to
+	 * demote or reclaim pages from the target node via kswapd if we are
+	 * low on free memory on target node. If we don't do this and if
+	 * we have free memory on the slower(lower) memtier, we would start
+	 * allocating pages from slower(lower) memory tiers without even forcing
+	 * a demotion of cold pages from the target memtier. This can result
+	 * in the kernel placing hot pages in slower(lower) memory tiers.
+	 */
+	mtc->nmask = NULL;
+	mtc->gfp_mask |= __GFP_THISNODE;
+	dst = alloc_migration_target(src, (unsigned long)mtc);
+	if (dst)
+		return dst;
+
+	mtc->gfp_mask &= ~__GFP_THISNODE;
+	mtc->nmask = allowed_mask;
+
+	return alloc_migration_target(src, (unsigned long)mtc);
+}
+
+unsigned int mt_demote_folios(struct list_head *demote_folios,
+			      struct pglist_data *pgdat,
+			      struct mem_cgroup *memcg)
+{
+	int target_nid;
+	unsigned int nr_succeeded;
+	nodemask_t allowed_mask;
+
+	struct migration_target_control mtc = {
+		/*
+		 * Allocate from 'node', or fail quickly and quietly.
+		 * When this happens, 'page' will likely just be discarded
+		 * instead of migrated.
+		 */
+		.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
+			__GFP_NOMEMALLOC | GFP_NOWAIT,
+		.nmask = &allowed_mask,
+		.reason = MR_DEMOTION,
+	};
+
+	if (list_empty(demote_folios))
+		return 0;
+
+	node_get_allowed_targets(pgdat, &allowed_mask);
+	mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
+	if (nodes_empty(allowed_mask))
+		return 0;
+
+	target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
+	if (target_nid == NUMA_NO_NODE)
+		/* No lower-tier nodes or nodes were hot-unplugged. */
+		return 0;
+
+	mtc.nid = target_nid;
+
+	/* Demotion ignores all cpuset and mempolicy settings */
+	migrate_pages(demote_folios, alloc_demote_folio, NULL,
+			(unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
+			&nr_succeeded);
+
+	return nr_succeeded;
+}
+
 static void disable_all_demotion_targets(void)
 {
 	struct memory_tier *memtier;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0fc9373e8251..5e0138b94480 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -983,84 +983,6 @@ static void folio_check_dirty_writeback(struct folio *folio,
 		mapping->a_ops->is_dirty_writeback(folio, dirty, writeback);
 }
 
-static struct folio *alloc_demote_folio(struct folio *src,
-		unsigned long private)
-{
-	struct folio *dst;
-	nodemask_t *allowed_mask;
-	struct migration_target_control *mtc;
-
-	mtc = (struct migration_target_control *)private;
-
-	allowed_mask = mtc->nmask;
-	/*
-	 * make sure we allocate from the target node first also trying to
-	 * demote or reclaim pages from the target node via kswapd if we are
-	 * low on free memory on target node. If we don't do this and if
-	 * we have free memory on the slower(lower) memtier, we would start
-	 * allocating pages from slower(lower) memory tiers without even forcing
-	 * a demotion of cold pages from the target memtier. This can result
-	 * in the kernel placing hot pages in slower(lower) memory tiers.
-	 */
-	mtc->nmask = NULL;
-	mtc->gfp_mask |= __GFP_THISNODE;
-	dst = alloc_migration_target(src, (unsigned long)mtc);
-	if (dst)
-		return dst;
-
-	mtc->gfp_mask &= ~__GFP_THISNODE;
-	mtc->nmask = allowed_mask;
-
-	return alloc_migration_target(src, (unsigned long)mtc);
-}
-
-/*
- * Take folios on @demote_folios and attempt to demote them to another node.
- * Folios which are not demoted are left on @demote_folios.
- */
-static unsigned int demote_folio_list(struct list_head *demote_folios,
-				      struct pglist_data *pgdat,
-				      struct mem_cgroup *memcg)
-{
-	int target_nid;
-	unsigned int nr_succeeded;
-	nodemask_t allowed_mask;
-
-	struct migration_target_control mtc = {
-		/*
-		 * Allocate from 'node', or fail quickly and quietly.
-		 * When this happens, 'page' will likely just be discarded
-		 * instead of migrated.
-		 */
-		.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
-			__GFP_NOMEMALLOC | GFP_NOWAIT,
-		.nmask = &allowed_mask,
-		.reason = MR_DEMOTION,
-	};
-
-	if (list_empty(demote_folios))
-		return 0;
-
-	node_get_allowed_targets(pgdat, &allowed_mask);
-	mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
-	if (nodes_empty(allowed_mask))
-		return 0;
-
-	target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
-	if (target_nid == NUMA_NO_NODE)
-		/* No lower-tier nodes or nodes were hot-unplugged. */
-		return 0;
-
-	mtc.nid = target_nid;
-
-	/* Demotion ignores all cpuset and mempolicy settings */
-	migrate_pages(demote_folios, alloc_demote_folio, NULL,
-		      (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
-		      &nr_succeeded);
-
-	return nr_succeeded;
-}
-
 static bool may_enter_fs(struct folio *folio, gfp_t gfp_mask)
 {
 	if (gfp_mask & __GFP_FS)
@@ -1573,7 +1495,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 	/* 'folio_list' is always empty here */
 
 	/* Migrate folios selected for demotion */
-	nr_demoted = demote_folio_list(&demote_folios, pgdat, memcg);
+	nr_demoted = mt_demote_folios(&demote_folios, pgdat, memcg);
 	nr_reclaimed += nr_demoted;
 	stat->nr_demoted += nr_demoted;
 	/* Folios that could not be demoted are still in @demote_folios */
-- 
2.53.0
Re: [PATCH 1/4] mm: Move demotion related functions in memory-tiers.c
Posted by David Hildenbrand (Arm) 3 weeks, 5 days ago
On 3/11/26 12:02, Alexandre Ghiti wrote:
> Let's have all the demotion functions in this file, no functional
> change intended.
> 
> Suggested-by: Gregory Price <gourry@gourry.net>
> Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
> ---
>  include/linux/memory-tiers.h | 18 ++++++++
>  mm/memory-tiers.c            | 75 +++++++++++++++++++++++++++++++++
>  mm/vmscan.c                  | 80 +-----------------------------------
>  3 files changed, 94 insertions(+), 79 deletions(-)
> 
> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
> index 96987d9d95a8..0bf0d002939e 100644
> --- a/include/linux/memory-tiers.h
> +++ b/include/linux/memory-tiers.h
> @@ -56,6 +56,9 @@ void mt_put_memory_types(struct list_head *memory_types);
>  int next_demotion_node(int node, const nodemask_t *allowed_mask);
>  void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
>  bool node_is_toptier(int node);
> +unsigned int mt_demote_folios(struct list_head *demote_folios,
> +			      struct pglist_data *pgdat,
> +			      struct mem_cgroup *memcg);
>  #else
>  static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
>  {
> @@ -71,6 +74,14 @@ static inline bool node_is_toptier(int node)
>  {
>  	return true;
>  }
> +
> +static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
> +					    struct pglist_data *pgdat,
> +					    struct mem_cgroup *memcg)

use two-tab indentation on second parameter line please. So this fits
into a single line. Same for the other functions.

Just like alloc_demote_folio() that you are moving already did.

[...]

> -static struct folio *alloc_demote_folio(struct folio *src,
> -		unsigned long private)
> -{
> -	struct folio *dst;
> -	nodemask_t *allowed_mask;
> -	struct migration_target_control *mtc;
> -
> -	mtc = (struct migration_target_control *)private;
> -
> -	allowed_mask = mtc->nmask;
> -	/*
> -	 * make sure we allocate from the target node first also trying to
> -	 * demote or reclaim pages from the target node via kswapd if we are
> -	 * low on free memory on target node. If we don't do this and if
> -	 * we have free memory on the slower(lower) memtier, we would start
> -	 * allocating pages from slower(lower) memory tiers without even forcing
> -	 * a demotion of cold pages from the target memtier. This can result
> -	 * in the kernel placing hot pages in slower(lower) memory tiers.
> -	 */
> -	mtc->nmask = NULL;
> -	mtc->gfp_mask |= __GFP_THISNODE;
> -	dst = alloc_migration_target(src, (unsigned long)mtc);
> -	if (dst)
> -		return dst;
> -
> -	mtc->gfp_mask &= ~__GFP_THISNODE;
> -	mtc->nmask = allowed_mask;
> -

I think this function changed in the meantime in mm/mm-unstable. Against
which branch is this patch?

-- 
Cheers,

David
Re: [PATCH 1/4] mm: Move demotion related functions in memory-tiers.c
Posted by Alexandre Ghiti 3 weeks, 4 days ago
Hi David,

On 3/12/26 13:56, David Hildenbrand (Arm) wrote:
> On 3/11/26 12:02, Alexandre Ghiti wrote:
>> Let's have all the demotion functions in this file, no functional
>> change intended.
>>
>> Suggested-by: Gregory Price <gourry@gourry.net>
>> Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
>> ---
>>   include/linux/memory-tiers.h | 18 ++++++++
>>   mm/memory-tiers.c            | 75 +++++++++++++++++++++++++++++++++
>>   mm/vmscan.c                  | 80 +-----------------------------------
>>   3 files changed, 94 insertions(+), 79 deletions(-)
>>
>> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
>> index 96987d9d95a8..0bf0d002939e 100644
>> --- a/include/linux/memory-tiers.h
>> +++ b/include/linux/memory-tiers.h
>> @@ -56,6 +56,9 @@ void mt_put_memory_types(struct list_head *memory_types);
>>   int next_demotion_node(int node, const nodemask_t *allowed_mask);
>>   void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
>>   bool node_is_toptier(int node);
>> +unsigned int mt_demote_folios(struct list_head *demote_folios,
>> +			      struct pglist_data *pgdat,
>> +			      struct mem_cgroup *memcg);
>>   #else
>>   static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
>>   {
>> @@ -71,6 +74,14 @@ static inline bool node_is_toptier(int node)
>>   {
>>   	return true;
>>   }
>> +
>> +static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
>> +					    struct pglist_data *pgdat,
>> +					    struct mem_cgroup *memcg)
> use two-tab indentation on second parameter line please. So this fits
> into a single line. Same for the other functions.
>
> Just like alloc_demote_folio() that you are moving already did.


Will do.


>
> [...]
>
>> -static struct folio *alloc_demote_folio(struct folio *src,
>> -		unsigned long private)
>> -{
>> -	struct folio *dst;
>> -	nodemask_t *allowed_mask;
>> -	struct migration_target_control *mtc;
>> -
>> -	mtc = (struct migration_target_control *)private;
>> -
>> -	allowed_mask = mtc->nmask;
>> -	/*
>> -	 * make sure we allocate from the target node first also trying to
>> -	 * demote or reclaim pages from the target node via kswapd if we are
>> -	 * low on free memory on target node. If we don't do this and if
>> -	 * we have free memory on the slower(lower) memtier, we would start
>> -	 * allocating pages from slower(lower) memory tiers without even forcing
>> -	 * a demotion of cold pages from the target memtier. This can result
>> -	 * in the kernel placing hot pages in slower(lower) memory tiers.
>> -	 */
>> -	mtc->nmask = NULL;
>> -	mtc->gfp_mask |= __GFP_THISNODE;
>> -	dst = alloc_migration_target(src, (unsigned long)mtc);
>> -	if (dst)
>> -		return dst;
>> -
>> -	mtc->gfp_mask &= ~__GFP_THISNODE;
>> -	mtc->nmask = allowed_mask;
>> -
> I think this function changed in the meantime in mm/mm-unstable. Against
> which branch is this patch?


Against Linus v7.0-rc3. I have just checked and you're right, I missed 
this modification, I'll rebase against mm-unstable.

Thanks,

Alex

>
Re: [PATCH 1/4] mm: Move demotion related functions in memory-tiers.c
Posted by Donet Tom 3 weeks, 5 days ago
Hi Alexander

On 3/11/26 4:32 PM, Alexandre Ghiti wrote:
> Let's have all the demotion functions in this file, no functional
> change intended.
>
> Suggested-by: Gregory Price <gourry@gourry.net>
> Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
> ---
>   include/linux/memory-tiers.h | 18 ++++++++
>   mm/memory-tiers.c            | 75 +++++++++++++++++++++++++++++++++
>   mm/vmscan.c                  | 80 +-----------------------------------
>   3 files changed, 94 insertions(+), 79 deletions(-)
>
> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
> index 96987d9d95a8..0bf0d002939e 100644
> --- a/include/linux/memory-tiers.h
> +++ b/include/linux/memory-tiers.h
> @@ -56,6 +56,9 @@ void mt_put_memory_types(struct list_head *memory_types);
>   int next_demotion_node(int node, const nodemask_t *allowed_mask);
>   void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
>   bool node_is_toptier(int node);
> +unsigned int mt_demote_folios(struct list_head *demote_folios,
> +			      struct pglist_data *pgdat,
> +			      struct mem_cgroup *memcg);
>   #else
>   static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
>   {
> @@ -71,6 +74,14 @@ static inline bool node_is_toptier(int node)
>   {
>   	return true;
>   }
> +
> +static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
> +					    struct pglist_data *pgdat,
> +					    struct mem_cgroup *memcg)
> +{
> +	return 0;
> +}
> +
>   #endif
>   
>   #else
> @@ -116,6 +127,13 @@ static inline bool node_is_toptier(int node)
>   	return true;
>   }
>   
> +static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
> +					    struct pglist_data *pgdat,
> +					    struct mem_cgroup *memcg)
> +{
> +	return 0;
> +}
> +
>   static inline int register_mt_adistance_algorithm(struct notifier_block *nb)
>   {
>   	return 0;
> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
> index 986f809376eb..afdf21738a54 100644
> --- a/mm/memory-tiers.c
> +++ b/mm/memory-tiers.c
> @@ -7,6 +7,7 @@
>   #include <linux/memory-tiers.h>
>   #include <linux/notifier.h>
>   #include <linux/sched/sysctl.h>
> +#include <linux/migrate.h>
>   
>   #include "internal.h"
>   
> @@ -373,6 +374,80 @@ int next_demotion_node(int node, const nodemask_t *allowed_mask)
>   	return find_next_best_node(node, &mask);
>   }
>   
> +static struct folio *alloc_demote_folio(struct folio *src,
> +					unsigned long private)
> +{
> +	struct folio *dst;
> +	nodemask_t *allowed_mask;
> +	struct migration_target_control *mtc;
> +
> +	mtc = (struct migration_target_control *)private;
> +
> +	allowed_mask = mtc->nmask;
> +	/*
> +	 * make sure we allocate from the target node first also trying to
> +	 * demote or reclaim pages from the target node via kswapd if we are
> +	 * low on free memory on target node. If we don't do this and if
> +	 * we have free memory on the slower(lower) memtier, we would start
> +	 * allocating pages from slower(lower) memory tiers without even forcing
> +	 * a demotion of cold pages from the target memtier. This can result
> +	 * in the kernel placing hot pages in slower(lower) memory tiers.
> +	 */
> +	mtc->nmask = NULL;
> +	mtc->gfp_mask |= __GFP_THISNODE;
> +	dst = alloc_migration_target(src, (unsigned long)mtc);
> +	if (dst)
> +		return dst;
> +
> +	mtc->gfp_mask &= ~__GFP_THISNODE;
> +	mtc->nmask = allowed_mask;
> +
> +	return alloc_migration_target(src, (unsigned long)mtc);
> +}
> +
> +unsigned int mt_demote_folios(struct list_head *demote_folios,


Demotion will happen only when different memory tiers are present, 
right? Since demote_folios() already implies that the folios are being 
demoted to a lower tier, is the mt_ prefix needed in the function name? 
I’m fine with keeping it as is, but I just wanted to clarify.

Otherwise it LGTM

Reviewed by: Donet Tom <donettom@linux.ibm.com>

> +			      struct pglist_data *pgdat,
> +			      struct mem_cgroup *memcg)
> +{
> +	int target_nid;
> +	unsigned int nr_succeeded;
> +	nodemask_t allowed_mask;
> +
> +	struct migration_target_control mtc = {
> +		/*
> +		 * Allocate from 'node', or fail quickly and quietly.
> +		 * When this happens, 'page' will likely just be discarded
> +		 * instead of migrated.
> +		 */
> +		.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
> +			__GFP_NOMEMALLOC | GFP_NOWAIT,
> +		.nmask = &allowed_mask,
> +		.reason = MR_DEMOTION,
> +	};
> +
> +	if (list_empty(demote_folios))
> +		return 0;
> +
> +	node_get_allowed_targets(pgdat, &allowed_mask);
> +	mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
> +	if (nodes_empty(allowed_mask))
> +		return 0;
> +
> +	target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
> +	if (target_nid == NUMA_NO_NODE)
> +		/* No lower-tier nodes or nodes were hot-unplugged. */
> +		return 0;
> +
> +	mtc.nid = target_nid;
> +
> +	/* Demotion ignores all cpuset and mempolicy settings */
> +	migrate_pages(demote_folios, alloc_demote_folio, NULL,
> +			(unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
> +			&nr_succeeded);
> +
> +	return nr_succeeded;
> +}
> +
>   static void disable_all_demotion_targets(void)
>   {
>   	struct memory_tier *memtier;
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 0fc9373e8251..5e0138b94480 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -983,84 +983,6 @@ static void folio_check_dirty_writeback(struct folio *folio,
>   		mapping->a_ops->is_dirty_writeback(folio, dirty, writeback);
>   }
>   
> -static struct folio *alloc_demote_folio(struct folio *src,
> -		unsigned long private)
> -{
> -	struct folio *dst;
> -	nodemask_t *allowed_mask;
> -	struct migration_target_control *mtc;
> -
> -	mtc = (struct migration_target_control *)private;
> -
> -	allowed_mask = mtc->nmask;
> -	/*
> -	 * make sure we allocate from the target node first also trying to
> -	 * demote or reclaim pages from the target node via kswapd if we are
> -	 * low on free memory on target node. If we don't do this and if
> -	 * we have free memory on the slower(lower) memtier, we would start
> -	 * allocating pages from slower(lower) memory tiers without even forcing
> -	 * a demotion of cold pages from the target memtier. This can result
> -	 * in the kernel placing hot pages in slower(lower) memory tiers.
> -	 */
> -	mtc->nmask = NULL;
> -	mtc->gfp_mask |= __GFP_THISNODE;
> -	dst = alloc_migration_target(src, (unsigned long)mtc);
> -	if (dst)
> -		return dst;
> -
> -	mtc->gfp_mask &= ~__GFP_THISNODE;
> -	mtc->nmask = allowed_mask;
> -
> -	return alloc_migration_target(src, (unsigned long)mtc);
> -}
> -
> -/*
> - * Take folios on @demote_folios and attempt to demote them to another node.
> - * Folios which are not demoted are left on @demote_folios.
> - */
> -static unsigned int demote_folio_list(struct list_head *demote_folios,
> -				      struct pglist_data *pgdat,
> -				      struct mem_cgroup *memcg)
> -{
> -	int target_nid;
> -	unsigned int nr_succeeded;
> -	nodemask_t allowed_mask;
> -
> -	struct migration_target_control mtc = {
> -		/*
> -		 * Allocate from 'node', or fail quickly and quietly.
> -		 * When this happens, 'page' will likely just be discarded
> -		 * instead of migrated.
> -		 */
> -		.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
> -			__GFP_NOMEMALLOC | GFP_NOWAIT,
> -		.nmask = &allowed_mask,
> -		.reason = MR_DEMOTION,
> -	};
> -
> -	if (list_empty(demote_folios))
> -		return 0;
> -
> -	node_get_allowed_targets(pgdat, &allowed_mask);
> -	mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
> -	if (nodes_empty(allowed_mask))
> -		return 0;
> -
> -	target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
> -	if (target_nid == NUMA_NO_NODE)
> -		/* No lower-tier nodes or nodes were hot-unplugged. */
> -		return 0;
> -
> -	mtc.nid = target_nid;
> -
> -	/* Demotion ignores all cpuset and mempolicy settings */
> -	migrate_pages(demote_folios, alloc_demote_folio, NULL,
> -		      (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
> -		      &nr_succeeded);
> -
> -	return nr_succeeded;
> -}
> -
>   static bool may_enter_fs(struct folio *folio, gfp_t gfp_mask)
>   {
>   	if (gfp_mask & __GFP_FS)
> @@ -1573,7 +1495,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
>   	/* 'folio_list' is always empty here */
>   
>   	/* Migrate folios selected for demotion */
> -	nr_demoted = demote_folio_list(&demote_folios, pgdat, memcg);
> +	nr_demoted = mt_demote_folios(&demote_folios, pgdat, memcg);
>   	nr_reclaimed += nr_demoted;
>   	stat->nr_demoted += nr_demoted;
>   	/* Folios that could not be demoted are still in @demote_folios */
Re: [PATCH 1/4] mm: Move demotion related functions in memory-tiers.c
Posted by Alexandre Ghiti 3 weeks, 4 days ago
Hi Tom,

On 3/12/26 09:44, Donet Tom wrote:
>
> Hi Alexander
>
> On 3/11/26 4:32 PM, Alexandre Ghiti wrote:
>> Let's have all the demotion functions in this file, no functional
>> change intended.
>>
>> Suggested-by: Gregory Price <gourry@gourry.net>
>> Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
>> ---
>>   include/linux/memory-tiers.h | 18 ++++++++
>>   mm/memory-tiers.c            | 75 +++++++++++++++++++++++++++++++++
>>   mm/vmscan.c                  | 80 +-----------------------------------
>>   3 files changed, 94 insertions(+), 79 deletions(-)
>>
>> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
>> index 96987d9d95a8..0bf0d002939e 100644
>> --- a/include/linux/memory-tiers.h
>> +++ b/include/linux/memory-tiers.h
>> @@ -56,6 +56,9 @@ void mt_put_memory_types(struct list_head 
>> *memory_types);
>>   int next_demotion_node(int node, const nodemask_t *allowed_mask);
>>   void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
>>   bool node_is_toptier(int node);
>> +unsigned int mt_demote_folios(struct list_head *demote_folios,
>> +                  struct pglist_data *pgdat,
>> +                  struct mem_cgroup *memcg);
>>   #else
>>   static inline int next_demotion_node(int node, const nodemask_t 
>> *allowed_mask)
>>   {
>> @@ -71,6 +74,14 @@ static inline bool node_is_toptier(int node)
>>   {
>>       return true;
>>   }
>> +
>> +static inline unsigned int mt_demote_folios(struct list_head 
>> *demote_folios,
>> +                        struct pglist_data *pgdat,
>> +                        struct mem_cgroup *memcg)
>> +{
>> +    return 0;
>> +}
>> +
>>   #endif
>>     #else
>> @@ -116,6 +127,13 @@ static inline bool node_is_toptier(int node)
>>       return true;
>>   }
>>   +static inline unsigned int mt_demote_folios(struct list_head 
>> *demote_folios,
>> +                        struct pglist_data *pgdat,
>> +                        struct mem_cgroup *memcg)
>> +{
>> +    return 0;
>> +}
>> +
>>   static inline int register_mt_adistance_algorithm(struct 
>> notifier_block *nb)
>>   {
>>       return 0;
>> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
>> index 986f809376eb..afdf21738a54 100644
>> --- a/mm/memory-tiers.c
>> +++ b/mm/memory-tiers.c
>> @@ -7,6 +7,7 @@
>>   #include <linux/memory-tiers.h>
>>   #include <linux/notifier.h>
>>   #include <linux/sched/sysctl.h>
>> +#include <linux/migrate.h>
>>     #include "internal.h"
>>   @@ -373,6 +374,80 @@ int next_demotion_node(int node, const 
>> nodemask_t *allowed_mask)
>>       return find_next_best_node(node, &mask);
>>   }
>>   +static struct folio *alloc_demote_folio(struct folio *src,
>> +                    unsigned long private)
>> +{
>> +    struct folio *dst;
>> +    nodemask_t *allowed_mask;
>> +    struct migration_target_control *mtc;
>> +
>> +    mtc = (struct migration_target_control *)private;
>> +
>> +    allowed_mask = mtc->nmask;
>> +    /*
>> +     * make sure we allocate from the target node first also trying to
>> +     * demote or reclaim pages from the target node via kswapd if we 
>> are
>> +     * low on free memory on target node. If we don't do this and if
>> +     * we have free memory on the slower(lower) memtier, we would start
>> +     * allocating pages from slower(lower) memory tiers without even 
>> forcing
>> +     * a demotion of cold pages from the target memtier. This can 
>> result
>> +     * in the kernel placing hot pages in slower(lower) memory tiers.
>> +     */
>> +    mtc->nmask = NULL;
>> +    mtc->gfp_mask |= __GFP_THISNODE;
>> +    dst = alloc_migration_target(src, (unsigned long)mtc);
>> +    if (dst)
>> +        return dst;
>> +
>> +    mtc->gfp_mask &= ~__GFP_THISNODE;
>> +    mtc->nmask = allowed_mask;
>> +
>> +    return alloc_migration_target(src, (unsigned long)mtc);
>> +}
>> +
>> +unsigned int mt_demote_folios(struct list_head *demote_folios,
>
>
> Demotion will happen only when different memory tiers are present, 
> right? Since demote_folios() already implies that the folios are being 
> demoted to a lower tier, is the mt_ prefix needed in the function 
> name? I’m fine with keeping it as is, but I just wanted to clarify.


You're right, demote implies some memory tiers. But I like the mt_ 
prefix, some functions in memory-tiers.c already have this prefix so it 
adds consistency: so since you don't mind, I'll keep it :)


>
> Otherwise it LGTM
>
> Reviewed by: Donet Tom <donettom@linux.ibm.com>


Thanks for your time!

Alex


>
>> +                  struct pglist_data *pgdat,
>> +                  struct mem_cgroup *memcg)
>> +{
>> +    int target_nid;
>> +    unsigned int nr_succeeded;
>> +    nodemask_t allowed_mask;
>> +
>> +    struct migration_target_control mtc = {
>> +        /*
>> +         * Allocate from 'node', or fail quickly and quietly.
>> +         * When this happens, 'page' will likely just be discarded
>> +         * instead of migrated.
>> +         */
>> +        .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
>> +            __GFP_NOMEMALLOC | GFP_NOWAIT,
>> +        .nmask = &allowed_mask,
>> +        .reason = MR_DEMOTION,
>> +    };
>> +
>> +    if (list_empty(demote_folios))
>> +        return 0;
>> +
>> +    node_get_allowed_targets(pgdat, &allowed_mask);
>> +    mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
>> +    if (nodes_empty(allowed_mask))
>> +        return 0;
>> +
>> +    target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
>> +    if (target_nid == NUMA_NO_NODE)
>> +        /* No lower-tier nodes or nodes were hot-unplugged. */
>> +        return 0;
>> +
>> +    mtc.nid = target_nid;
>> +
>> +    /* Demotion ignores all cpuset and mempolicy settings */
>> +    migrate_pages(demote_folios, alloc_demote_folio, NULL,
>> +            (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
>> +            &nr_succeeded);
>> +
>> +    return nr_succeeded;
>> +}
>> +
>>   static void disable_all_demotion_targets(void)
>>   {
>>       struct memory_tier *memtier;
>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>> index 0fc9373e8251..5e0138b94480 100644
>> --- a/mm/vmscan.c
>> +++ b/mm/vmscan.c
>> @@ -983,84 +983,6 @@ static void folio_check_dirty_writeback(struct 
>> folio *folio,
>>           mapping->a_ops->is_dirty_writeback(folio, dirty, writeback);
>>   }
>>   -static struct folio *alloc_demote_folio(struct folio *src,
>> -        unsigned long private)
>> -{
>> -    struct folio *dst;
>> -    nodemask_t *allowed_mask;
>> -    struct migration_target_control *mtc;
>> -
>> -    mtc = (struct migration_target_control *)private;
>> -
>> -    allowed_mask = mtc->nmask;
>> -    /*
>> -     * make sure we allocate from the target node first also trying to
>> -     * demote or reclaim pages from the target node via kswapd if we 
>> are
>> -     * low on free memory on target node. If we don't do this and if
>> -     * we have free memory on the slower(lower) memtier, we would start
>> -     * allocating pages from slower(lower) memory tiers without even 
>> forcing
>> -     * a demotion of cold pages from the target memtier. This can 
>> result
>> -     * in the kernel placing hot pages in slower(lower) memory tiers.
>> -     */
>> -    mtc->nmask = NULL;
>> -    mtc->gfp_mask |= __GFP_THISNODE;
>> -    dst = alloc_migration_target(src, (unsigned long)mtc);
>> -    if (dst)
>> -        return dst;
>> -
>> -    mtc->gfp_mask &= ~__GFP_THISNODE;
>> -    mtc->nmask = allowed_mask;
>> -
>> -    return alloc_migration_target(src, (unsigned long)mtc);
>> -}
>> -
>> -/*
>> - * Take folios on @demote_folios and attempt to demote them to 
>> another node.
>> - * Folios which are not demoted are left on @demote_folios.
>> - */
>> -static unsigned int demote_folio_list(struct list_head *demote_folios,
>> -                      struct pglist_data *pgdat,
>> -                      struct mem_cgroup *memcg)
>> -{
>> -    int target_nid;
>> -    unsigned int nr_succeeded;
>> -    nodemask_t allowed_mask;
>> -
>> -    struct migration_target_control mtc = {
>> -        /*
>> -         * Allocate from 'node', or fail quickly and quietly.
>> -         * When this happens, 'page' will likely just be discarded
>> -         * instead of migrated.
>> -         */
>> -        .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
>> -            __GFP_NOMEMALLOC | GFP_NOWAIT,
>> -        .nmask = &allowed_mask,
>> -        .reason = MR_DEMOTION,
>> -    };
>> -
>> -    if (list_empty(demote_folios))
>> -        return 0;
>> -
>> -    node_get_allowed_targets(pgdat, &allowed_mask);
>> -    mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
>> -    if (nodes_empty(allowed_mask))
>> -        return 0;
>> -
>> -    target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
>> -    if (target_nid == NUMA_NO_NODE)
>> -        /* No lower-tier nodes or nodes were hot-unplugged. */
>> -        return 0;
>> -
>> -    mtc.nid = target_nid;
>> -
>> -    /* Demotion ignores all cpuset and mempolicy settings */
>> -    migrate_pages(demote_folios, alloc_demote_folio, NULL,
>> -              (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
>> -              &nr_succeeded);
>> -
>> -    return nr_succeeded;
>> -}
>> -
>>   static bool may_enter_fs(struct folio *folio, gfp_t gfp_mask)
>>   {
>>       if (gfp_mask & __GFP_FS)
>> @@ -1573,7 +1495,7 @@ static unsigned int shrink_folio_list(struct 
>> list_head *folio_list,
>>       /* 'folio_list' is always empty here */
>>         /* Migrate folios selected for demotion */
>> -    nr_demoted = demote_folio_list(&demote_folios, pgdat, memcg);
>> +    nr_demoted = mt_demote_folios(&demote_folios, pgdat, memcg);
>>       nr_reclaimed += nr_demoted;
>>       stat->nr_demoted += nr_demoted;
>>       /* Folios that could not be demoted are still in @demote_folios */
Re: [PATCH 1/4] mm: Move demotion related functions in memory-tiers.c
Posted by Joshua Hahn 3 weeks, 6 days ago
On Wed, 11 Mar 2026 12:02:40 +0100 Alexandre Ghiti <alex@ghiti.fr> wrote:

> Let's have all the demotion functions in this file, no functional
> change intended.

Hi Alexandre,

I hope you are doing well! Thank you for the patch.

Makes sense to move the migration functions together. Just one small
nit, I think the following comment is pretty helpful in understanding
that folios that aren't demoted still remain in @demote_folios. Should
we also move this comment to memory-tiers.c?

[...snip...]

> -/*
> - * Take folios on @demote_folios and attempt to demote them to another node.
> - * Folios which are not demoted are left on @demote_folios.
> - */
> -static unsigned int demote_folio_list(struct list_head *demote_folios,
> -				      struct pglist_data *pgdat,
> -				      struct mem_cgroup *memcg)
> -{
> -	int target_nid;
> -	unsigned int nr_succeeded;
> -	nodemask_t allowed_mask;
> -

[...snip...]

Anyways, the rest looks good to me. Have a great day!
Joshua
Re: [PATCH 1/4] mm: Move demotion related functions in memory-tiers.c
Posted by Alexandre Ghiti 3 weeks, 4 days ago
Hi Joshua,

On 3/11/26 15:55, Joshua Hahn wrote:
> On Wed, 11 Mar 2026 12:02:40 +0100 Alexandre Ghiti <alex@ghiti.fr> wrote:
>
>> Let's have all the demotion functions in this file, no functional
>> change intended.
> Hi Alexandre,
>
> I hope you are doing well! Thank you for the patch.
>
> Makes sense to move the migration functions together. Just one small
> nit, I think the following comment is pretty helpful in understanding
> that folios that aren't demoted still remain in @demote_folios. Should
> we also move this comment to memory-tiers.c?


You're totally right, my bad! I'll add the comment in the next version.


>
> [...snip...]
>
>> -/*
>> - * Take folios on @demote_folios and attempt to demote them to another node.
>> - * Folios which are not demoted are left on @demote_folios.
>> - */
>> -static unsigned int demote_folio_list(struct list_head *demote_folios,
>> -				      struct pglist_data *pgdat,
>> -				      struct mem_cgroup *memcg)
>> -{
>> -	int target_nid;
>> -	unsigned int nr_succeeded;
>> -	nodemask_t allowed_mask;
>> -
> [...snip...]
>
> Anyways, the rest looks good to me. Have a great day!
> Joshua


Thanks!

Alex