Let's have all the demotion functions in this file, no functional
change intended.
Suggested-by: Gregory Price <gourry@gourry.net>
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
---
include/linux/memory-tiers.h | 18 ++++++++
mm/memory-tiers.c | 75 +++++++++++++++++++++++++++++++++
mm/vmscan.c | 80 +-----------------------------------
3 files changed, 94 insertions(+), 79 deletions(-)
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 96987d9d95a8..0bf0d002939e 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -56,6 +56,9 @@ void mt_put_memory_types(struct list_head *memory_types);
int next_demotion_node(int node, const nodemask_t *allowed_mask);
void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
bool node_is_toptier(int node);
+unsigned int mt_demote_folios(struct list_head *demote_folios,
+ struct pglist_data *pgdat,
+ struct mem_cgroup *memcg);
#else
static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
{
@@ -71,6 +74,14 @@ static inline bool node_is_toptier(int node)
{
return true;
}
+
+static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
+ struct pglist_data *pgdat,
+ struct mem_cgroup *memcg)
+{
+ return 0;
+}
+
#endif
#else
@@ -116,6 +127,13 @@ static inline bool node_is_toptier(int node)
return true;
}
+static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
+ struct pglist_data *pgdat,
+ struct mem_cgroup *memcg)
+{
+ return 0;
+}
+
static inline int register_mt_adistance_algorithm(struct notifier_block *nb)
{
return 0;
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 986f809376eb..afdf21738a54 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -7,6 +7,7 @@
#include <linux/memory-tiers.h>
#include <linux/notifier.h>
#include <linux/sched/sysctl.h>
+#include <linux/migrate.h>
#include "internal.h"
@@ -373,6 +374,80 @@ int next_demotion_node(int node, const nodemask_t *allowed_mask)
return find_next_best_node(node, &mask);
}
+static struct folio *alloc_demote_folio(struct folio *src,
+ unsigned long private)
+{
+ struct folio *dst;
+ nodemask_t *allowed_mask;
+ struct migration_target_control *mtc;
+
+ mtc = (struct migration_target_control *)private;
+
+ allowed_mask = mtc->nmask;
+ /*
+ * make sure we allocate from the target node first also trying to
+ * demote or reclaim pages from the target node via kswapd if we are
+ * low on free memory on target node. If we don't do this and if
+ * we have free memory on the slower(lower) memtier, we would start
+ * allocating pages from slower(lower) memory tiers without even forcing
+ * a demotion of cold pages from the target memtier. This can result
+ * in the kernel placing hot pages in slower(lower) memory tiers.
+ */
+ mtc->nmask = NULL;
+ mtc->gfp_mask |= __GFP_THISNODE;
+ dst = alloc_migration_target(src, (unsigned long)mtc);
+ if (dst)
+ return dst;
+
+ mtc->gfp_mask &= ~__GFP_THISNODE;
+ mtc->nmask = allowed_mask;
+
+ return alloc_migration_target(src, (unsigned long)mtc);
+}
+
+unsigned int mt_demote_folios(struct list_head *demote_folios,
+ struct pglist_data *pgdat,
+ struct mem_cgroup *memcg)
+{
+ int target_nid;
+ unsigned int nr_succeeded;
+ nodemask_t allowed_mask;
+
+ struct migration_target_control mtc = {
+ /*
+ * Allocate from 'node', or fail quickly and quietly.
+ * When this happens, 'page' will likely just be discarded
+ * instead of migrated.
+ */
+ .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
+ __GFP_NOMEMALLOC | GFP_NOWAIT,
+ .nmask = &allowed_mask,
+ .reason = MR_DEMOTION,
+ };
+
+ if (list_empty(demote_folios))
+ return 0;
+
+ node_get_allowed_targets(pgdat, &allowed_mask);
+ mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
+ if (nodes_empty(allowed_mask))
+ return 0;
+
+ target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
+ if (target_nid == NUMA_NO_NODE)
+ /* No lower-tier nodes or nodes were hot-unplugged. */
+ return 0;
+
+ mtc.nid = target_nid;
+
+ /* Demotion ignores all cpuset and mempolicy settings */
+ migrate_pages(demote_folios, alloc_demote_folio, NULL,
+ (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
+ &nr_succeeded);
+
+ return nr_succeeded;
+}
+
static void disable_all_demotion_targets(void)
{
struct memory_tier *memtier;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0fc9373e8251..5e0138b94480 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -983,84 +983,6 @@ static void folio_check_dirty_writeback(struct folio *folio,
mapping->a_ops->is_dirty_writeback(folio, dirty, writeback);
}
-static struct folio *alloc_demote_folio(struct folio *src,
- unsigned long private)
-{
- struct folio *dst;
- nodemask_t *allowed_mask;
- struct migration_target_control *mtc;
-
- mtc = (struct migration_target_control *)private;
-
- allowed_mask = mtc->nmask;
- /*
- * make sure we allocate from the target node first also trying to
- * demote or reclaim pages from the target node via kswapd if we are
- * low on free memory on target node. If we don't do this and if
- * we have free memory on the slower(lower) memtier, we would start
- * allocating pages from slower(lower) memory tiers without even forcing
- * a demotion of cold pages from the target memtier. This can result
- * in the kernel placing hot pages in slower(lower) memory tiers.
- */
- mtc->nmask = NULL;
- mtc->gfp_mask |= __GFP_THISNODE;
- dst = alloc_migration_target(src, (unsigned long)mtc);
- if (dst)
- return dst;
-
- mtc->gfp_mask &= ~__GFP_THISNODE;
- mtc->nmask = allowed_mask;
-
- return alloc_migration_target(src, (unsigned long)mtc);
-}
-
-/*
- * Take folios on @demote_folios and attempt to demote them to another node.
- * Folios which are not demoted are left on @demote_folios.
- */
-static unsigned int demote_folio_list(struct list_head *demote_folios,
- struct pglist_data *pgdat,
- struct mem_cgroup *memcg)
-{
- int target_nid;
- unsigned int nr_succeeded;
- nodemask_t allowed_mask;
-
- struct migration_target_control mtc = {
- /*
- * Allocate from 'node', or fail quickly and quietly.
- * When this happens, 'page' will likely just be discarded
- * instead of migrated.
- */
- .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
- __GFP_NOMEMALLOC | GFP_NOWAIT,
- .nmask = &allowed_mask,
- .reason = MR_DEMOTION,
- };
-
- if (list_empty(demote_folios))
- return 0;
-
- node_get_allowed_targets(pgdat, &allowed_mask);
- mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
- if (nodes_empty(allowed_mask))
- return 0;
-
- target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
- if (target_nid == NUMA_NO_NODE)
- /* No lower-tier nodes or nodes were hot-unplugged. */
- return 0;
-
- mtc.nid = target_nid;
-
- /* Demotion ignores all cpuset and mempolicy settings */
- migrate_pages(demote_folios, alloc_demote_folio, NULL,
- (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
- &nr_succeeded);
-
- return nr_succeeded;
-}
-
static bool may_enter_fs(struct folio *folio, gfp_t gfp_mask)
{
if (gfp_mask & __GFP_FS)
@@ -1573,7 +1495,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
/* 'folio_list' is always empty here */
/* Migrate folios selected for demotion */
- nr_demoted = demote_folio_list(&demote_folios, pgdat, memcg);
+ nr_demoted = mt_demote_folios(&demote_folios, pgdat, memcg);
nr_reclaimed += nr_demoted;
stat->nr_demoted += nr_demoted;
/* Folios that could not be demoted are still in @demote_folios */
--
2.53.0
On 3/11/26 12:02, Alexandre Ghiti wrote:
> Let's have all the demotion functions in this file, no functional
> change intended.
>
> Suggested-by: Gregory Price <gourry@gourry.net>
> Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
> ---
> include/linux/memory-tiers.h | 18 ++++++++
> mm/memory-tiers.c | 75 +++++++++++++++++++++++++++++++++
> mm/vmscan.c | 80 +-----------------------------------
> 3 files changed, 94 insertions(+), 79 deletions(-)
>
> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
> index 96987d9d95a8..0bf0d002939e 100644
> --- a/include/linux/memory-tiers.h
> +++ b/include/linux/memory-tiers.h
> @@ -56,6 +56,9 @@ void mt_put_memory_types(struct list_head *memory_types);
> int next_demotion_node(int node, const nodemask_t *allowed_mask);
> void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
> bool node_is_toptier(int node);
> +unsigned int mt_demote_folios(struct list_head *demote_folios,
> + struct pglist_data *pgdat,
> + struct mem_cgroup *memcg);
> #else
> static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
> {
> @@ -71,6 +74,14 @@ static inline bool node_is_toptier(int node)
> {
> return true;
> }
> +
> +static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
> + struct pglist_data *pgdat,
> + struct mem_cgroup *memcg)
use two-tab indentation on second parameter line please. So this fits
into a single line. Same for the other functions.
Just like alloc_demote_folio() that you are moving already did.
[...]
> -static struct folio *alloc_demote_folio(struct folio *src,
> - unsigned long private)
> -{
> - struct folio *dst;
> - nodemask_t *allowed_mask;
> - struct migration_target_control *mtc;
> -
> - mtc = (struct migration_target_control *)private;
> -
> - allowed_mask = mtc->nmask;
> - /*
> - * make sure we allocate from the target node first also trying to
> - * demote or reclaim pages from the target node via kswapd if we are
> - * low on free memory on target node. If we don't do this and if
> - * we have free memory on the slower(lower) memtier, we would start
> - * allocating pages from slower(lower) memory tiers without even forcing
> - * a demotion of cold pages from the target memtier. This can result
> - * in the kernel placing hot pages in slower(lower) memory tiers.
> - */
> - mtc->nmask = NULL;
> - mtc->gfp_mask |= __GFP_THISNODE;
> - dst = alloc_migration_target(src, (unsigned long)mtc);
> - if (dst)
> - return dst;
> -
> - mtc->gfp_mask &= ~__GFP_THISNODE;
> - mtc->nmask = allowed_mask;
> -
I think this function changed in the meantime in mm/mm-unstable. Against
which branch is this patch?
--
Cheers,
David
Hi David,
On 3/12/26 13:56, David Hildenbrand (Arm) wrote:
> On 3/11/26 12:02, Alexandre Ghiti wrote:
>> Let's have all the demotion functions in this file, no functional
>> change intended.
>>
>> Suggested-by: Gregory Price <gourry@gourry.net>
>> Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
>> ---
>> include/linux/memory-tiers.h | 18 ++++++++
>> mm/memory-tiers.c | 75 +++++++++++++++++++++++++++++++++
>> mm/vmscan.c | 80 +-----------------------------------
>> 3 files changed, 94 insertions(+), 79 deletions(-)
>>
>> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
>> index 96987d9d95a8..0bf0d002939e 100644
>> --- a/include/linux/memory-tiers.h
>> +++ b/include/linux/memory-tiers.h
>> @@ -56,6 +56,9 @@ void mt_put_memory_types(struct list_head *memory_types);
>> int next_demotion_node(int node, const nodemask_t *allowed_mask);
>> void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
>> bool node_is_toptier(int node);
>> +unsigned int mt_demote_folios(struct list_head *demote_folios,
>> + struct pglist_data *pgdat,
>> + struct mem_cgroup *memcg);
>> #else
>> static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
>> {
>> @@ -71,6 +74,14 @@ static inline bool node_is_toptier(int node)
>> {
>> return true;
>> }
>> +
>> +static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
>> + struct pglist_data *pgdat,
>> + struct mem_cgroup *memcg)
> use two-tab indentation on second parameter line please. So this fits
> into a single line. Same for the other functions.
>
> Just like alloc_demote_folio() that you are moving already did.
Will do.
>
> [...]
>
>> -static struct folio *alloc_demote_folio(struct folio *src,
>> - unsigned long private)
>> -{
>> - struct folio *dst;
>> - nodemask_t *allowed_mask;
>> - struct migration_target_control *mtc;
>> -
>> - mtc = (struct migration_target_control *)private;
>> -
>> - allowed_mask = mtc->nmask;
>> - /*
>> - * make sure we allocate from the target node first also trying to
>> - * demote or reclaim pages from the target node via kswapd if we are
>> - * low on free memory on target node. If we don't do this and if
>> - * we have free memory on the slower(lower) memtier, we would start
>> - * allocating pages from slower(lower) memory tiers without even forcing
>> - * a demotion of cold pages from the target memtier. This can result
>> - * in the kernel placing hot pages in slower(lower) memory tiers.
>> - */
>> - mtc->nmask = NULL;
>> - mtc->gfp_mask |= __GFP_THISNODE;
>> - dst = alloc_migration_target(src, (unsigned long)mtc);
>> - if (dst)
>> - return dst;
>> -
>> - mtc->gfp_mask &= ~__GFP_THISNODE;
>> - mtc->nmask = allowed_mask;
>> -
> I think this function changed in the meantime in mm/mm-unstable. Against
> which branch is this patch?
Against Linus v7.0-rc3. I have just checked and you're right, I missed
this modification, I'll rebase against mm-unstable.
Thanks,
Alex
>
Hi Alexander
On 3/11/26 4:32 PM, Alexandre Ghiti wrote:
> Let's have all the demotion functions in this file, no functional
> change intended.
>
> Suggested-by: Gregory Price <gourry@gourry.net>
> Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
> ---
> include/linux/memory-tiers.h | 18 ++++++++
> mm/memory-tiers.c | 75 +++++++++++++++++++++++++++++++++
> mm/vmscan.c | 80 +-----------------------------------
> 3 files changed, 94 insertions(+), 79 deletions(-)
>
> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
> index 96987d9d95a8..0bf0d002939e 100644
> --- a/include/linux/memory-tiers.h
> +++ b/include/linux/memory-tiers.h
> @@ -56,6 +56,9 @@ void mt_put_memory_types(struct list_head *memory_types);
> int next_demotion_node(int node, const nodemask_t *allowed_mask);
> void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
> bool node_is_toptier(int node);
> +unsigned int mt_demote_folios(struct list_head *demote_folios,
> + struct pglist_data *pgdat,
> + struct mem_cgroup *memcg);
> #else
> static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
> {
> @@ -71,6 +74,14 @@ static inline bool node_is_toptier(int node)
> {
> return true;
> }
> +
> +static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
> + struct pglist_data *pgdat,
> + struct mem_cgroup *memcg)
> +{
> + return 0;
> +}
> +
> #endif
>
> #else
> @@ -116,6 +127,13 @@ static inline bool node_is_toptier(int node)
> return true;
> }
>
> +static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
> + struct pglist_data *pgdat,
> + struct mem_cgroup *memcg)
> +{
> + return 0;
> +}
> +
> static inline int register_mt_adistance_algorithm(struct notifier_block *nb)
> {
> return 0;
> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
> index 986f809376eb..afdf21738a54 100644
> --- a/mm/memory-tiers.c
> +++ b/mm/memory-tiers.c
> @@ -7,6 +7,7 @@
> #include <linux/memory-tiers.h>
> #include <linux/notifier.h>
> #include <linux/sched/sysctl.h>
> +#include <linux/migrate.h>
>
> #include "internal.h"
>
> @@ -373,6 +374,80 @@ int next_demotion_node(int node, const nodemask_t *allowed_mask)
> return find_next_best_node(node, &mask);
> }
>
> +static struct folio *alloc_demote_folio(struct folio *src,
> + unsigned long private)
> +{
> + struct folio *dst;
> + nodemask_t *allowed_mask;
> + struct migration_target_control *mtc;
> +
> + mtc = (struct migration_target_control *)private;
> +
> + allowed_mask = mtc->nmask;
> + /*
> + * make sure we allocate from the target node first also trying to
> + * demote or reclaim pages from the target node via kswapd if we are
> + * low on free memory on target node. If we don't do this and if
> + * we have free memory on the slower(lower) memtier, we would start
> + * allocating pages from slower(lower) memory tiers without even forcing
> + * a demotion of cold pages from the target memtier. This can result
> + * in the kernel placing hot pages in slower(lower) memory tiers.
> + */
> + mtc->nmask = NULL;
> + mtc->gfp_mask |= __GFP_THISNODE;
> + dst = alloc_migration_target(src, (unsigned long)mtc);
> + if (dst)
> + return dst;
> +
> + mtc->gfp_mask &= ~__GFP_THISNODE;
> + mtc->nmask = allowed_mask;
> +
> + return alloc_migration_target(src, (unsigned long)mtc);
> +}
> +
> +unsigned int mt_demote_folios(struct list_head *demote_folios,
Demotion will happen only when different memory tiers are present,
right? Since demote_folios() already implies that the folios are being
demoted to a lower tier, is the mt_ prefix needed in the function name?
I’m fine with keeping it as is, but I just wanted to clarify.
Otherwise it LGTM
Reviewed by: Donet Tom <donettom@linux.ibm.com>
> + struct pglist_data *pgdat,
> + struct mem_cgroup *memcg)
> +{
> + int target_nid;
> + unsigned int nr_succeeded;
> + nodemask_t allowed_mask;
> +
> + struct migration_target_control mtc = {
> + /*
> + * Allocate from 'node', or fail quickly and quietly.
> + * When this happens, 'page' will likely just be discarded
> + * instead of migrated.
> + */
> + .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
> + __GFP_NOMEMALLOC | GFP_NOWAIT,
> + .nmask = &allowed_mask,
> + .reason = MR_DEMOTION,
> + };
> +
> + if (list_empty(demote_folios))
> + return 0;
> +
> + node_get_allowed_targets(pgdat, &allowed_mask);
> + mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
> + if (nodes_empty(allowed_mask))
> + return 0;
> +
> + target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
> + if (target_nid == NUMA_NO_NODE)
> + /* No lower-tier nodes or nodes were hot-unplugged. */
> + return 0;
> +
> + mtc.nid = target_nid;
> +
> + /* Demotion ignores all cpuset and mempolicy settings */
> + migrate_pages(demote_folios, alloc_demote_folio, NULL,
> + (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
> + &nr_succeeded);
> +
> + return nr_succeeded;
> +}
> +
> static void disable_all_demotion_targets(void)
> {
> struct memory_tier *memtier;
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 0fc9373e8251..5e0138b94480 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -983,84 +983,6 @@ static void folio_check_dirty_writeback(struct folio *folio,
> mapping->a_ops->is_dirty_writeback(folio, dirty, writeback);
> }
>
> -static struct folio *alloc_demote_folio(struct folio *src,
> - unsigned long private)
> -{
> - struct folio *dst;
> - nodemask_t *allowed_mask;
> - struct migration_target_control *mtc;
> -
> - mtc = (struct migration_target_control *)private;
> -
> - allowed_mask = mtc->nmask;
> - /*
> - * make sure we allocate from the target node first also trying to
> - * demote or reclaim pages from the target node via kswapd if we are
> - * low on free memory on target node. If we don't do this and if
> - * we have free memory on the slower(lower) memtier, we would start
> - * allocating pages from slower(lower) memory tiers without even forcing
> - * a demotion of cold pages from the target memtier. This can result
> - * in the kernel placing hot pages in slower(lower) memory tiers.
> - */
> - mtc->nmask = NULL;
> - mtc->gfp_mask |= __GFP_THISNODE;
> - dst = alloc_migration_target(src, (unsigned long)mtc);
> - if (dst)
> - return dst;
> -
> - mtc->gfp_mask &= ~__GFP_THISNODE;
> - mtc->nmask = allowed_mask;
> -
> - return alloc_migration_target(src, (unsigned long)mtc);
> -}
> -
> -/*
> - * Take folios on @demote_folios and attempt to demote them to another node.
> - * Folios which are not demoted are left on @demote_folios.
> - */
> -static unsigned int demote_folio_list(struct list_head *demote_folios,
> - struct pglist_data *pgdat,
> - struct mem_cgroup *memcg)
> -{
> - int target_nid;
> - unsigned int nr_succeeded;
> - nodemask_t allowed_mask;
> -
> - struct migration_target_control mtc = {
> - /*
> - * Allocate from 'node', or fail quickly and quietly.
> - * When this happens, 'page' will likely just be discarded
> - * instead of migrated.
> - */
> - .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
> - __GFP_NOMEMALLOC | GFP_NOWAIT,
> - .nmask = &allowed_mask,
> - .reason = MR_DEMOTION,
> - };
> -
> - if (list_empty(demote_folios))
> - return 0;
> -
> - node_get_allowed_targets(pgdat, &allowed_mask);
> - mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
> - if (nodes_empty(allowed_mask))
> - return 0;
> -
> - target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
> - if (target_nid == NUMA_NO_NODE)
> - /* No lower-tier nodes or nodes were hot-unplugged. */
> - return 0;
> -
> - mtc.nid = target_nid;
> -
> - /* Demotion ignores all cpuset and mempolicy settings */
> - migrate_pages(demote_folios, alloc_demote_folio, NULL,
> - (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
> - &nr_succeeded);
> -
> - return nr_succeeded;
> -}
> -
> static bool may_enter_fs(struct folio *folio, gfp_t gfp_mask)
> {
> if (gfp_mask & __GFP_FS)
> @@ -1573,7 +1495,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
> /* 'folio_list' is always empty here */
>
> /* Migrate folios selected for demotion */
> - nr_demoted = demote_folio_list(&demote_folios, pgdat, memcg);
> + nr_demoted = mt_demote_folios(&demote_folios, pgdat, memcg);
> nr_reclaimed += nr_demoted;
> stat->nr_demoted += nr_demoted;
> /* Folios that could not be demoted are still in @demote_folios */
Hi Tom,
On 3/12/26 09:44, Donet Tom wrote:
>
> Hi Alexander
>
> On 3/11/26 4:32 PM, Alexandre Ghiti wrote:
>> Let's have all the demotion functions in this file, no functional
>> change intended.
>>
>> Suggested-by: Gregory Price <gourry@gourry.net>
>> Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
>> ---
>> include/linux/memory-tiers.h | 18 ++++++++
>> mm/memory-tiers.c | 75 +++++++++++++++++++++++++++++++++
>> mm/vmscan.c | 80 +-----------------------------------
>> 3 files changed, 94 insertions(+), 79 deletions(-)
>>
>> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
>> index 96987d9d95a8..0bf0d002939e 100644
>> --- a/include/linux/memory-tiers.h
>> +++ b/include/linux/memory-tiers.h
>> @@ -56,6 +56,9 @@ void mt_put_memory_types(struct list_head
>> *memory_types);
>> int next_demotion_node(int node, const nodemask_t *allowed_mask);
>> void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
>> bool node_is_toptier(int node);
>> +unsigned int mt_demote_folios(struct list_head *demote_folios,
>> + struct pglist_data *pgdat,
>> + struct mem_cgroup *memcg);
>> #else
>> static inline int next_demotion_node(int node, const nodemask_t
>> *allowed_mask)
>> {
>> @@ -71,6 +74,14 @@ static inline bool node_is_toptier(int node)
>> {
>> return true;
>> }
>> +
>> +static inline unsigned int mt_demote_folios(struct list_head
>> *demote_folios,
>> + struct pglist_data *pgdat,
>> + struct mem_cgroup *memcg)
>> +{
>> + return 0;
>> +}
>> +
>> #endif
>> #else
>> @@ -116,6 +127,13 @@ static inline bool node_is_toptier(int node)
>> return true;
>> }
>> +static inline unsigned int mt_demote_folios(struct list_head
>> *demote_folios,
>> + struct pglist_data *pgdat,
>> + struct mem_cgroup *memcg)
>> +{
>> + return 0;
>> +}
>> +
>> static inline int register_mt_adistance_algorithm(struct
>> notifier_block *nb)
>> {
>> return 0;
>> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
>> index 986f809376eb..afdf21738a54 100644
>> --- a/mm/memory-tiers.c
>> +++ b/mm/memory-tiers.c
>> @@ -7,6 +7,7 @@
>> #include <linux/memory-tiers.h>
>> #include <linux/notifier.h>
>> #include <linux/sched/sysctl.h>
>> +#include <linux/migrate.h>
>> #include "internal.h"
>> @@ -373,6 +374,80 @@ int next_demotion_node(int node, const
>> nodemask_t *allowed_mask)
>> return find_next_best_node(node, &mask);
>> }
>> +static struct folio *alloc_demote_folio(struct folio *src,
>> + unsigned long private)
>> +{
>> + struct folio *dst;
>> + nodemask_t *allowed_mask;
>> + struct migration_target_control *mtc;
>> +
>> + mtc = (struct migration_target_control *)private;
>> +
>> + allowed_mask = mtc->nmask;
>> + /*
>> + * make sure we allocate from the target node first also trying to
>> + * demote or reclaim pages from the target node via kswapd if we
>> are
>> + * low on free memory on target node. If we don't do this and if
>> + * we have free memory on the slower(lower) memtier, we would start
>> + * allocating pages from slower(lower) memory tiers without even
>> forcing
>> + * a demotion of cold pages from the target memtier. This can
>> result
>> + * in the kernel placing hot pages in slower(lower) memory tiers.
>> + */
>> + mtc->nmask = NULL;
>> + mtc->gfp_mask |= __GFP_THISNODE;
>> + dst = alloc_migration_target(src, (unsigned long)mtc);
>> + if (dst)
>> + return dst;
>> +
>> + mtc->gfp_mask &= ~__GFP_THISNODE;
>> + mtc->nmask = allowed_mask;
>> +
>> + return alloc_migration_target(src, (unsigned long)mtc);
>> +}
>> +
>> +unsigned int mt_demote_folios(struct list_head *demote_folios,
>
>
> Demotion will happen only when different memory tiers are present,
> right? Since demote_folios() already implies that the folios are being
> demoted to a lower tier, is the mt_ prefix needed in the function
> name? I’m fine with keeping it as is, but I just wanted to clarify.
You're right, demote implies some memory tiers. But I like the mt_
prefix, some functions in memory-tiers.c already have this prefix so it
adds consistency: so since you don't mind, I'll keep it :)
>
> Otherwise it LGTM
>
> Reviewed by: Donet Tom <donettom@linux.ibm.com>
Thanks for your time!
Alex
>
>> + struct pglist_data *pgdat,
>> + struct mem_cgroup *memcg)
>> +{
>> + int target_nid;
>> + unsigned int nr_succeeded;
>> + nodemask_t allowed_mask;
>> +
>> + struct migration_target_control mtc = {
>> + /*
>> + * Allocate from 'node', or fail quickly and quietly.
>> + * When this happens, 'page' will likely just be discarded
>> + * instead of migrated.
>> + */
>> + .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
>> + __GFP_NOMEMALLOC | GFP_NOWAIT,
>> + .nmask = &allowed_mask,
>> + .reason = MR_DEMOTION,
>> + };
>> +
>> + if (list_empty(demote_folios))
>> + return 0;
>> +
>> + node_get_allowed_targets(pgdat, &allowed_mask);
>> + mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
>> + if (nodes_empty(allowed_mask))
>> + return 0;
>> +
>> + target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
>> + if (target_nid == NUMA_NO_NODE)
>> + /* No lower-tier nodes or nodes were hot-unplugged. */
>> + return 0;
>> +
>> + mtc.nid = target_nid;
>> +
>> + /* Demotion ignores all cpuset and mempolicy settings */
>> + migrate_pages(demote_folios, alloc_demote_folio, NULL,
>> + (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
>> + &nr_succeeded);
>> +
>> + return nr_succeeded;
>> +}
>> +
>> static void disable_all_demotion_targets(void)
>> {
>> struct memory_tier *memtier;
>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>> index 0fc9373e8251..5e0138b94480 100644
>> --- a/mm/vmscan.c
>> +++ b/mm/vmscan.c
>> @@ -983,84 +983,6 @@ static void folio_check_dirty_writeback(struct
>> folio *folio,
>> mapping->a_ops->is_dirty_writeback(folio, dirty, writeback);
>> }
>> -static struct folio *alloc_demote_folio(struct folio *src,
>> - unsigned long private)
>> -{
>> - struct folio *dst;
>> - nodemask_t *allowed_mask;
>> - struct migration_target_control *mtc;
>> -
>> - mtc = (struct migration_target_control *)private;
>> -
>> - allowed_mask = mtc->nmask;
>> - /*
>> - * make sure we allocate from the target node first also trying to
>> - * demote or reclaim pages from the target node via kswapd if we
>> are
>> - * low on free memory on target node. If we don't do this and if
>> - * we have free memory on the slower(lower) memtier, we would start
>> - * allocating pages from slower(lower) memory tiers without even
>> forcing
>> - * a demotion of cold pages from the target memtier. This can
>> result
>> - * in the kernel placing hot pages in slower(lower) memory tiers.
>> - */
>> - mtc->nmask = NULL;
>> - mtc->gfp_mask |= __GFP_THISNODE;
>> - dst = alloc_migration_target(src, (unsigned long)mtc);
>> - if (dst)
>> - return dst;
>> -
>> - mtc->gfp_mask &= ~__GFP_THISNODE;
>> - mtc->nmask = allowed_mask;
>> -
>> - return alloc_migration_target(src, (unsigned long)mtc);
>> -}
>> -
>> -/*
>> - * Take folios on @demote_folios and attempt to demote them to
>> another node.
>> - * Folios which are not demoted are left on @demote_folios.
>> - */
>> -static unsigned int demote_folio_list(struct list_head *demote_folios,
>> - struct pglist_data *pgdat,
>> - struct mem_cgroup *memcg)
>> -{
>> - int target_nid;
>> - unsigned int nr_succeeded;
>> - nodemask_t allowed_mask;
>> -
>> - struct migration_target_control mtc = {
>> - /*
>> - * Allocate from 'node', or fail quickly and quietly.
>> - * When this happens, 'page' will likely just be discarded
>> - * instead of migrated.
>> - */
>> - .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
>> - __GFP_NOMEMALLOC | GFP_NOWAIT,
>> - .nmask = &allowed_mask,
>> - .reason = MR_DEMOTION,
>> - };
>> -
>> - if (list_empty(demote_folios))
>> - return 0;
>> -
>> - node_get_allowed_targets(pgdat, &allowed_mask);
>> - mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
>> - if (nodes_empty(allowed_mask))
>> - return 0;
>> -
>> - target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
>> - if (target_nid == NUMA_NO_NODE)
>> - /* No lower-tier nodes or nodes were hot-unplugged. */
>> - return 0;
>> -
>> - mtc.nid = target_nid;
>> -
>> - /* Demotion ignores all cpuset and mempolicy settings */
>> - migrate_pages(demote_folios, alloc_demote_folio, NULL,
>> - (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
>> - &nr_succeeded);
>> -
>> - return nr_succeeded;
>> -}
>> -
>> static bool may_enter_fs(struct folio *folio, gfp_t gfp_mask)
>> {
>> if (gfp_mask & __GFP_FS)
>> @@ -1573,7 +1495,7 @@ static unsigned int shrink_folio_list(struct
>> list_head *folio_list,
>> /* 'folio_list' is always empty here */
>> /* Migrate folios selected for demotion */
>> - nr_demoted = demote_folio_list(&demote_folios, pgdat, memcg);
>> + nr_demoted = mt_demote_folios(&demote_folios, pgdat, memcg);
>> nr_reclaimed += nr_demoted;
>> stat->nr_demoted += nr_demoted;
>> /* Folios that could not be demoted are still in @demote_folios */
On Wed, 11 Mar 2026 12:02:40 +0100 Alexandre Ghiti <alex@ghiti.fr> wrote:
> Let's have all the demotion functions in this file, no functional
> change intended.
Hi Alexandre,
I hope you are doing well! Thank you for the patch.
Makes sense to move the migration functions together. Just one small
nit, I think the following comment is pretty helpful in understanding
that folios that aren't demoted still remain in @demote_folios. Should
we also move this comment to memory-tiers.c?
[...snip...]
> -/*
> - * Take folios on @demote_folios and attempt to demote them to another node.
> - * Folios which are not demoted are left on @demote_folios.
> - */
> -static unsigned int demote_folio_list(struct list_head *demote_folios,
> - struct pglist_data *pgdat,
> - struct mem_cgroup *memcg)
> -{
> - int target_nid;
> - unsigned int nr_succeeded;
> - nodemask_t allowed_mask;
> -
[...snip...]
Anyways, the rest looks good to me. Have a great day!
Joshua
Hi Joshua,
On 3/11/26 15:55, Joshua Hahn wrote:
> On Wed, 11 Mar 2026 12:02:40 +0100 Alexandre Ghiti <alex@ghiti.fr> wrote:
>
>> Let's have all the demotion functions in this file, no functional
>> change intended.
> Hi Alexandre,
>
> I hope you are doing well! Thank you for the patch.
>
> Makes sense to move the migration functions together. Just one small
> nit, I think the following comment is pretty helpful in understanding
> that folios that aren't demoted still remain in @demote_folios. Should
> we also move this comment to memory-tiers.c?
You're totally right, my bad! I'll add the comment in the next version.
>
> [...snip...]
>
>> -/*
>> - * Take folios on @demote_folios and attempt to demote them to another node.
>> - * Folios which are not demoted are left on @demote_folios.
>> - */
>> -static unsigned int demote_folio_list(struct list_head *demote_folios,
>> - struct pglist_data *pgdat,
>> - struct mem_cgroup *memcg)
>> -{
>> - int target_nid;
>> - unsigned int nr_succeeded;
>> - nodemask_t allowed_mask;
>> -
> [...snip...]
>
> Anyways, the rest looks good to me. Have a great day!
> Joshua
Thanks!
Alex
© 2016 - 2026 Red Hat, Inc.