[PATCH 2/4] mm/memcg: make memory.reclaim interface generic

Davidlohr Bueso posted 4 patches 3 months, 2 weeks ago
Only 3 patches received!
[PATCH 2/4] mm/memcg: make memory.reclaim interface generic
Posted by Davidlohr Bueso 3 months, 2 weeks ago
This adds a general call for both parsing as well as the
common reclaim semantics. memcg is still the only user and
no change in semantics.

Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
---
 mm/internal.h   |  2 +
 mm/memcontrol.c | 77 ++------------------------------------
 mm/vmscan.c     | 98 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 104 insertions(+), 73 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 3823fb356d3b..fc4262262b31 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -517,6 +517,8 @@ extern unsigned long highest_memmap_pfn;
 bool folio_isolate_lru(struct folio *folio);
 void folio_putback_lru(struct folio *folio);
 extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason);
+int user_proactive_reclaim(char *buf,
+			   struct mem_cgroup *memcg, pg_data_t *pgdat);
 
 /*
  * in mm/rmap.c:
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 902da8a9c643..015e406eadfa 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -51,7 +51,6 @@
 #include <linux/spinlock.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
-#include <linux/parser.h>
 #include <linux/vmpressure.h>
 #include <linux/memremap.h>
 #include <linux/mm_inline.h>
@@ -4566,83 +4565,15 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
 	return nbytes;
 }
 
-enum {
-	MEMORY_RECLAIM_SWAPPINESS = 0,
-	MEMORY_RECLAIM_SWAPPINESS_MAX,
-	MEMORY_RECLAIM_NULL,
-};
-
-static const match_table_t tokens = {
-	{ MEMORY_RECLAIM_SWAPPINESS, "swappiness=%d"},
-	{ MEMORY_RECLAIM_SWAPPINESS_MAX, "swappiness=max"},
-	{ MEMORY_RECLAIM_NULL, NULL },
-};
-
 static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 			      size_t nbytes, loff_t off)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
-	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
-	unsigned long nr_to_reclaim, nr_reclaimed = 0;
-	int swappiness = -1;
-	unsigned int reclaim_options;
-	char *old_buf, *start;
-	substring_t args[MAX_OPT_ARGS];
-
-	buf = strstrip(buf);
-
-	old_buf = buf;
-	nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE;
-	if (buf == old_buf)
-		return -EINVAL;
-
-	buf = strstrip(buf);
-
-	while ((start = strsep(&buf, " ")) != NULL) {
-		if (!strlen(start))
-			continue;
-		switch (match_token(start, tokens, args)) {
-		case MEMORY_RECLAIM_SWAPPINESS:
-			if (match_int(&args[0], &swappiness))
-				return -EINVAL;
-			if (swappiness < MIN_SWAPPINESS || swappiness > MAX_SWAPPINESS)
-				return -EINVAL;
-			break;
-		case MEMORY_RECLAIM_SWAPPINESS_MAX:
-			swappiness = SWAPPINESS_ANON_ONLY;
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
-
-	reclaim_options	= MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
-	while (nr_reclaimed < nr_to_reclaim) {
-		/* Will converge on zero, but reclaim enforces a minimum */
-		unsigned long batch_size = (nr_to_reclaim - nr_reclaimed) / 4;
-		unsigned long reclaimed;
-
-		if (signal_pending(current))
-			return -EINTR;
-
-		/*
-		 * This is the final attempt, drain percpu lru caches in the
-		 * hope of introducing more evictable pages for
-		 * try_to_free_mem_cgroup_pages().
-		 */
-		if (!nr_retries)
-			lru_add_drain_all();
-
-		reclaimed = try_to_free_mem_cgroup_pages(memcg,
-					batch_size, GFP_KERNEL,
-					reclaim_options,
-					swappiness == -1 ? NULL : &swappiness);
-
-		if (!reclaimed && !nr_retries--)
-			return -EAGAIN;
+	int ret;
 
-		nr_reclaimed += reclaimed;
-	}
+	ret = user_proactive_reclaim(buf, memcg, NULL);
+	if (ret)
+		return ret;
 
 	return nbytes;
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c13c01eb0b42..63ddec550c3b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -57,6 +57,7 @@
 #include <linux/rculist_nulls.h>
 #include <linux/random.h>
 #include <linux/mmu_notifier.h>
+#include <linux/parser.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -6714,6 +6715,15 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 
 	return nr_reclaimed;
 }
+#else
+unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
+					   unsigned long nr_pages,
+					   gfp_t gfp_mask,
+					   unsigned int reclaim_options,
+					   int *swappiness)
+{
+	return 0;
+}
 #endif
 
 static void kswapd_age_node(struct pglist_data *pgdat, struct scan_control *sc)
@@ -7708,6 +7718,94 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
 
 	return ret;
 }
+
+enum {
+	MEMORY_RECLAIM_SWAPPINESS = 0,
+	MEMORY_RECLAIM_SWAPPINESS_MAX,
+	MEMORY_RECLAIM_NULL,
+};
+static const match_table_t tokens = {
+	{ MEMORY_RECLAIM_SWAPPINESS, "swappiness=%d"},
+	{ MEMORY_RECLAIM_SWAPPINESS_MAX, "swappiness=max"},
+	{ MEMORY_RECLAIM_NULL, NULL },
+};
+
+int user_proactive_reclaim(char *buf, struct mem_cgroup *memcg, pg_data_t *pgdat)
+{
+	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
+	unsigned long nr_to_reclaim, nr_reclaimed = 0;
+	int swappiness = -1;
+	char *old_buf, *start;
+	substring_t args[MAX_OPT_ARGS];
+
+	if (!buf || (!memcg && !pgdat))
+		return -EINVAL;
+
+	buf = strstrip(buf);
+
+	old_buf = buf;
+	nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE;
+	if (buf == old_buf)
+		return -EINVAL;
+
+	buf = strstrip(buf);
+
+	while ((start = strsep(&buf, " ")) != NULL) {
+		if (!strlen(start))
+			continue;
+		switch (match_token(start, tokens, args)) {
+		case MEMORY_RECLAIM_SWAPPINESS:
+			if (match_int(&args[0], &swappiness))
+				return -EINVAL;
+			if (swappiness < MIN_SWAPPINESS ||
+			    swappiness > MAX_SWAPPINESS)
+				return -EINVAL;
+			break;
+		case MEMORY_RECLAIM_SWAPPINESS_MAX:
+			swappiness = SWAPPINESS_ANON_ONLY;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	while (nr_reclaimed < nr_to_reclaim) {
+		/* Will converge on zero, but reclaim enforces a minimum */
+		unsigned long batch_size = (nr_to_reclaim - nr_reclaimed) / 4;
+		unsigned long reclaimed;
+
+		if (signal_pending(current))
+			return -EINTR;
+
+		/*
+		 * This is the final attempt, drain percpu lru caches in the
+		 * hope of introducing more evictable pages.
+		 */
+		if (!nr_retries)
+			lru_add_drain_all();
+
+		if (memcg) {
+			unsigned int reclaim_options;
+
+			reclaim_options = MEMCG_RECLAIM_MAY_SWAP |
+					  MEMCG_RECLAIM_PROACTIVE;
+			reclaimed = try_to_free_mem_cgroup_pages(memcg,
+						 batch_size, GFP_KERNEL,
+						 reclaim_options,
+						 swappiness == -1 ? NULL : &swappiness);
+		} else {
+			return -EINVAL;
+		}
+
+		if (!reclaimed && !nr_retries--)
+			return -EAGAIN;
+
+		nr_reclaimed += reclaimed;
+	}
+
+	return 0;
+}
+
 #endif
 
 /**
-- 
2.39.5
Re: [PATCH 2/4] mm/memcg: make memory.reclaim interface generic
Posted by Shakeel Butt 2 months, 3 weeks ago
On Mon, Jun 23, 2025 at 11:58:49AM -0700, Davidlohr Bueso wrote:
> +
> +int user_proactive_reclaim(char *buf, struct mem_cgroup *memcg, pg_data_t *pgdat)
> +{
> +	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
> +	unsigned long nr_to_reclaim, nr_reclaimed = 0;
> +	int swappiness = -1;
> +	char *old_buf, *start;
> +	substring_t args[MAX_OPT_ARGS];
> +
> +	if (!buf || (!memcg && !pgdat))

I don't think this series is adding a use-case where both memcg and
pgdat are non-NULL, so let's error out on that as well.
Re: [PATCH 2/4] mm/memcg: make memory.reclaim interface generic
Posted by Andrew Morton 2 months, 3 weeks ago
On Thu, 17 Jul 2025 15:17:09 -0700 Shakeel Butt <shakeel.butt@linux.dev> wrote:

> On Mon, Jun 23, 2025 at 11:58:49AM -0700, Davidlohr Bueso wrote:
> > +
> > +int user_proactive_reclaim(char *buf, struct mem_cgroup *memcg, pg_data_t *pgdat)
> > +{
> > +	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
> > +	unsigned long nr_to_reclaim, nr_reclaimed = 0;
> > +	int swappiness = -1;
> > +	char *old_buf, *start;
> > +	substring_t args[MAX_OPT_ARGS];
> > +
> > +	if (!buf || (!memcg && !pgdat))
> 
> I don't think this series is adding a use-case where both memcg and
> pgdat are non-NULL, so let's error out on that as well.

As a followup, please.  This has been in -next for four weeks and I'd
prefer not to have to route around it (again).
Re: [PATCH 2/4] mm/memcg: make memory.reclaim interface generic
Posted by Davidlohr Bueso 2 months, 3 weeks ago
On Thu, 17 Jul 2025, Andrew Morton wrote:

>On Thu, 17 Jul 2025 15:17:09 -0700 Shakeel Butt <shakeel.butt@linux.dev> wrote:
>
>> On Mon, Jun 23, 2025 at 11:58:49AM -0700, Davidlohr Bueso wrote:
>> > +
>> > +int user_proactive_reclaim(char *buf, struct mem_cgroup *memcg, pg_data_t *pgdat)
>> > +{
>> > +	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
>> > +	unsigned long nr_to_reclaim, nr_reclaimed = 0;
>> > +	int swappiness = -1;
>> > +	char *old_buf, *start;
>> > +	substring_t args[MAX_OPT_ARGS];
>> > +
>> > +	if (!buf || (!memcg && !pgdat))
>>
>> I don't think this series is adding a use-case where both memcg and
>> pgdat are non-NULL, so let's error out on that as well.
>
>As a followup, please.  This has been in -next for four weeks and I'd
>prefer not to have to route around it (again).
>

From: Davidlohr Bueso <dave@stgolabs.net>
Date: Thu, 17 Jul 2025 16:53:24 -0700
Subject: [PATCH] mm-introduce-per-node-proactive-reclaim-interface-fix

Both memcg and node is also a bogus case, per Shakeel.

Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
---
  mm/vmscan.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4598d18ba256..d5f7b1703234 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -7758,7 +7758,7 @@ int user_proactive_reclaim(char *buf,
  	substring_t args[MAX_OPT_ARGS];
  	gfp_t gfp_mask = GFP_KERNEL;
  
-	if (!buf || (!memcg && !pgdat))
+	if (!buf || (!memcg && !pgdat) || (memcg && pgdat))
  		return -EINVAL;
  
  	buf = strstrip(buf);
-- 
2.39.5
Re: [PATCH 2/4] mm/memcg: make memory.reclaim interface generic
Posted by Shakeel Butt 2 months, 3 weeks ago
On Thu, Jul 17, 2025 at 04:56:04PM -0700, Davidlohr Bueso wrote:
> On Thu, 17 Jul 2025, Andrew Morton wrote:
> 
> > On Thu, 17 Jul 2025 15:17:09 -0700 Shakeel Butt <shakeel.butt@linux.dev> wrote:
> > 
> > > On Mon, Jun 23, 2025 at 11:58:49AM -0700, Davidlohr Bueso wrote:
> > > > +
> > > > +int user_proactive_reclaim(char *buf, struct mem_cgroup *memcg, pg_data_t *pgdat)
> > > > +{
> > > > +	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
> > > > +	unsigned long nr_to_reclaim, nr_reclaimed = 0;
> > > > +	int swappiness = -1;
> > > > +	char *old_buf, *start;
> > > > +	substring_t args[MAX_OPT_ARGS];
> > > > +
> > > > +	if (!buf || (!memcg && !pgdat))
> > > 
> > > I don't think this series is adding a use-case where both memcg and
> > > pgdat are non-NULL, so let's error out on that as well.
> > 
> > As a followup, please.  This has been in -next for four weeks and I'd
> > prefer not to have to route around it (again).
> > 
> 
> From: Davidlohr Bueso <dave@stgolabs.net>
> Date: Thu, 17 Jul 2025 16:53:24 -0700
> Subject: [PATCH] mm-introduce-per-node-proactive-reclaim-interface-fix
> 
> Both memcg and node is also a bogus case, per Shakeel.
> 
> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>

With this, I think we are good. We can always refactor and move code
around to our taste but interface and functionality wise this is fine.

Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Re: [PATCH 2/4] mm/memcg: make memory.reclaim interface generic
Posted by Roman Gushchin 2 months, 3 weeks ago
Davidlohr Bueso <dave@stgolabs.net> writes:

> This adds a general call for both parsing as well as the
> common reclaim semantics. memcg is still the only user and
> no change in semantics.
>
> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> ---
>  mm/internal.h   |  2 +
>  mm/memcontrol.c | 77 ++------------------------------------
>  mm/vmscan.c     | 98 +++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 104 insertions(+), 73 deletions(-)
> ...
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index c13c01eb0b42..63ddec550c3b 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -57,6 +57,7 @@
>  #include <linux/rculist_nulls.h>
>  #include <linux/random.h>
>  #include <linux/mmu_notifier.h>
> +#include <linux/parser.h>
>  
>  #include <asm/tlbflush.h>
>  #include <asm/div64.h>
> @@ -6714,6 +6715,15 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
>  
>  	return nr_reclaimed;
>  }
> +#else
> +unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
> +					   unsigned long nr_pages,
> +					   gfp_t gfp_mask,
> +					   unsigned int reclaim_options,
> +					   int *swappiness)
> +{
> +	return 0;
> +}
>  #endif
>  
>  static void kswapd_age_node(struct pglist_data *pgdat, struct scan_control *sc)
> @@ -7708,6 +7718,94 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
>  
>  	return ret;
>  }
> +
> +enum {
> +	MEMORY_RECLAIM_SWAPPINESS = 0,
> +	MEMORY_RECLAIM_SWAPPINESS_MAX,
> +	MEMORY_RECLAIM_NULL,
> +};
> +static const match_table_t tokens = {
> +	{ MEMORY_RECLAIM_SWAPPINESS, "swappiness=%d"},
> +	{ MEMORY_RECLAIM_SWAPPINESS_MAX, "swappiness=max"},
> +	{ MEMORY_RECLAIM_NULL, NULL },
> +};
> +
> +int user_proactive_reclaim(char *buf, struct mem_cgroup *memcg, pg_data_t *pgdat)
> +{
> +	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
> +	unsigned long nr_to_reclaim, nr_reclaimed = 0;
> +	int swappiness = -1;
> +	char *old_buf, *start;
> +	substring_t args[MAX_OPT_ARGS];
> +
> +	if (!buf || (!memcg && !pgdat))
> +		return -EINVAL;
> +
> +	buf = strstrip(buf);
> +
> +	old_buf = buf;
> +	nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE;
> +	if (buf == old_buf)
> +		return -EINVAL;
> +
> +	buf = strstrip(buf);

To be honest, not a big fan of this refactoring. Effectively parts of
the memcg user interface are moved into mm/vmscan.c. I get that you want
to use the exact same interface somewhere else, but still...

Is it possible to keep it in mm/memcontrol.c?
Also maybe split the actual reclaim mechanism and user's input parsing?

Thanks
Re: [PATCH 2/4] mm/memcg: make memory.reclaim interface generic
Posted by Davidlohr Bueso 2 months, 3 weeks ago
On Wed, 16 Jul 2025, Roman Gushchin wrote:

>Davidlohr Bueso <dave@stgolabs.net> writes:
>
>> This adds a general call for both parsing as well as the
>> common reclaim semantics. memcg is still the only user and
>> no change in semantics.
>>
>> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
>> ---
>>  mm/internal.h   |  2 +
>>  mm/memcontrol.c | 77 ++------------------------------------
>>  mm/vmscan.c     | 98 +++++++++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 104 insertions(+), 73 deletions(-)
>> ...
>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>> index c13c01eb0b42..63ddec550c3b 100644
>> --- a/mm/vmscan.c
>> +++ b/mm/vmscan.c
>> @@ -57,6 +57,7 @@
>>  #include <linux/rculist_nulls.h>
>>  #include <linux/random.h>
>>  #include <linux/mmu_notifier.h>
>> +#include <linux/parser.h>
>>
>>  #include <asm/tlbflush.h>
>>  #include <asm/div64.h>
>> @@ -6714,6 +6715,15 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
>>
>>  	return nr_reclaimed;
>>  }
>> +#else
>> +unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
>> +					   unsigned long nr_pages,
>> +					   gfp_t gfp_mask,
>> +					   unsigned int reclaim_options,
>> +					   int *swappiness)
>> +{
>> +	return 0;
>> +}
>>  #endif
>>
>>  static void kswapd_age_node(struct pglist_data *pgdat, struct scan_control *sc)
>> @@ -7708,6 +7718,94 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
>>
>>  	return ret;
>>  }
>> +
>> +enum {
>> +	MEMORY_RECLAIM_SWAPPINESS = 0,
>> +	MEMORY_RECLAIM_SWAPPINESS_MAX,
>> +	MEMORY_RECLAIM_NULL,
>> +};
>> +static const match_table_t tokens = {
>> +	{ MEMORY_RECLAIM_SWAPPINESS, "swappiness=%d"},
>> +	{ MEMORY_RECLAIM_SWAPPINESS_MAX, "swappiness=max"},
>> +	{ MEMORY_RECLAIM_NULL, NULL },
>> +};
>> +
>> +int user_proactive_reclaim(char *buf, struct mem_cgroup *memcg, pg_data_t *pgdat)
>> +{
>> +	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
>> +	unsigned long nr_to_reclaim, nr_reclaimed = 0;
>> +	int swappiness = -1;
>> +	char *old_buf, *start;
>> +	substring_t args[MAX_OPT_ARGS];
>> +
>> +	if (!buf || (!memcg && !pgdat))
>> +		return -EINVAL;
>> +
>> +	buf = strstrip(buf);
>> +
>> +	old_buf = buf;
>> +	nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE;
>> +	if (buf == old_buf)
>> +		return -EINVAL;
>> +
>> +	buf = strstrip(buf);
>
>To be honest, not a big fan of this refactoring. Effectively parts of
>the memcg user interface are moved into mm/vmscan.c. I get that you want
>to use the exact same interface somewhere else, but still...

I disagree, further this is no different than other callers in vmscan.c
around memcg.

>Is it possible to keep it in mm/memcontrol.c?

Why? now proactive reclaim is not special to memcg... unless strong reasons
it makes little sense to keep it there.

>Also maybe split the actual reclaim mechanism and user's input parsing?

I tried something like this initially, and ended up prefering this way.

Further, this approach limits the reachability of the input parsing logic, and
the interface is already being an exception to the one-value per file "rule".

Thanks,
Davidlohr
Re: [PATCH 2/4] mm/memcg: make memory.reclaim interface generic
Posted by Klara Modin 3 months, 2 weeks ago
Hi,

On 2025-06-23 11:58:49 -0700, Davidlohr Bueso wrote:
> This adds a general call for both parsing as well as the
> common reclaim semantics. memcg is still the only user and
> no change in semantics.
> 
> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> ---
>  mm/internal.h   |  2 +
>  mm/memcontrol.c | 77 ++------------------------------------
>  mm/vmscan.c     | 98 +++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 104 insertions(+), 73 deletions(-)
> 
> diff --git a/mm/internal.h b/mm/internal.h
> index 3823fb356d3b..fc4262262b31 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -517,6 +517,8 @@ extern unsigned long highest_memmap_pfn;
>  bool folio_isolate_lru(struct folio *folio);
>  void folio_putback_lru(struct folio *folio);
>  extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason);
> +int user_proactive_reclaim(char *buf,
> +			   struct mem_cgroup *memcg, pg_data_t *pgdat);
>  
>  /*
>   * in mm/rmap.c:
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 902da8a9c643..015e406eadfa 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -51,7 +51,6 @@
>  #include <linux/spinlock.h>
>  #include <linux/fs.h>
>  #include <linux/seq_file.h>
> -#include <linux/parser.h>
>  #include <linux/vmpressure.h>
>  #include <linux/memremap.h>
>  #include <linux/mm_inline.h>
> @@ -4566,83 +4565,15 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
>  	return nbytes;
>  }
>  
> -enum {
> -	MEMORY_RECLAIM_SWAPPINESS = 0,
> -	MEMORY_RECLAIM_SWAPPINESS_MAX,
> -	MEMORY_RECLAIM_NULL,
> -};
> -
> -static const match_table_t tokens = {
> -	{ MEMORY_RECLAIM_SWAPPINESS, "swappiness=%d"},
> -	{ MEMORY_RECLAIM_SWAPPINESS_MAX, "swappiness=max"},
> -	{ MEMORY_RECLAIM_NULL, NULL },
> -};
> -
>  static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
>  			      size_t nbytes, loff_t off)
>  {
>  	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
> -	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
> -	unsigned long nr_to_reclaim, nr_reclaimed = 0;
> -	int swappiness = -1;
> -	unsigned int reclaim_options;
> -	char *old_buf, *start;
> -	substring_t args[MAX_OPT_ARGS];
> -
> -	buf = strstrip(buf);
> -
> -	old_buf = buf;
> -	nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE;
> -	if (buf == old_buf)
> -		return -EINVAL;
> -
> -	buf = strstrip(buf);
> -
> -	while ((start = strsep(&buf, " ")) != NULL) {
> -		if (!strlen(start))
> -			continue;
> -		switch (match_token(start, tokens, args)) {
> -		case MEMORY_RECLAIM_SWAPPINESS:
> -			if (match_int(&args[0], &swappiness))
> -				return -EINVAL;
> -			if (swappiness < MIN_SWAPPINESS || swappiness > MAX_SWAPPINESS)
> -				return -EINVAL;
> -			break;
> -		case MEMORY_RECLAIM_SWAPPINESS_MAX:
> -			swappiness = SWAPPINESS_ANON_ONLY;
> -			break;
> -		default:
> -			return -EINVAL;
> -		}
> -	}
> -
> -	reclaim_options	= MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
> -	while (nr_reclaimed < nr_to_reclaim) {
> -		/* Will converge on zero, but reclaim enforces a minimum */
> -		unsigned long batch_size = (nr_to_reclaim - nr_reclaimed) / 4;
> -		unsigned long reclaimed;
> -
> -		if (signal_pending(current))
> -			return -EINTR;
> -
> -		/*
> -		 * This is the final attempt, drain percpu lru caches in the
> -		 * hope of introducing more evictable pages for
> -		 * try_to_free_mem_cgroup_pages().
> -		 */
> -		if (!nr_retries)
> -			lru_add_drain_all();
> -
> -		reclaimed = try_to_free_mem_cgroup_pages(memcg,
> -					batch_size, GFP_KERNEL,
> -					reclaim_options,
> -					swappiness == -1 ? NULL : &swappiness);
> -
> -		if (!reclaimed && !nr_retries--)
> -			return -EAGAIN;
> +	int ret;
>  
> -		nr_reclaimed += reclaimed;
> -	}

> +	ret = user_proactive_reclaim(buf, memcg, NULL);

This is outside CONFIG_NUMA.

> +	if (ret)
> +		return ret;
>  
>  	return nbytes;
>  }
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index c13c01eb0b42..63ddec550c3b 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -57,6 +57,7 @@
>  #include <linux/rculist_nulls.h>
>  #include <linux/random.h>
>  #include <linux/mmu_notifier.h>
> +#include <linux/parser.h>
>  
>  #include <asm/tlbflush.h>
>  #include <asm/div64.h>
> @@ -6714,6 +6715,15 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
>  
>  	return nr_reclaimed;
>  }
> +#else
> +unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
> +					   unsigned long nr_pages,
> +					   gfp_t gfp_mask,
> +					   unsigned int reclaim_options,
> +					   int *swappiness)
> +{
> +	return 0;
> +}
>  #endif
>  
>  static void kswapd_age_node(struct pglist_data *pgdat, struct scan_control *sc)
> @@ -7708,6 +7718,94 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
>  
>  	return ret;
>  }
> +
> +enum {
> +	MEMORY_RECLAIM_SWAPPINESS = 0,
> +	MEMORY_RECLAIM_SWAPPINESS_MAX,
> +	MEMORY_RECLAIM_NULL,
> +};
> +static const match_table_t tokens = {
> +	{ MEMORY_RECLAIM_SWAPPINESS, "swappiness=%d"},
> +	{ MEMORY_RECLAIM_SWAPPINESS_MAX, "swappiness=max"},
> +	{ MEMORY_RECLAIM_NULL, NULL },
> +};
> +
> +int user_proactive_reclaim(char *buf, struct mem_cgroup *memcg, pg_data_t *pgdat)
> +{
> +	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
> +	unsigned long nr_to_reclaim, nr_reclaimed = 0;
> +	int swappiness = -1;
> +	char *old_buf, *start;
> +	substring_t args[MAX_OPT_ARGS];
> +
> +	if (!buf || (!memcg && !pgdat))
> +		return -EINVAL;
> +
> +	buf = strstrip(buf);
> +
> +	old_buf = buf;
> +	nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE;
> +	if (buf == old_buf)
> +		return -EINVAL;
> +
> +	buf = strstrip(buf);
> +
> +	while ((start = strsep(&buf, " ")) != NULL) {
> +		if (!strlen(start))
> +			continue;
> +		switch (match_token(start, tokens, args)) {
> +		case MEMORY_RECLAIM_SWAPPINESS:
> +			if (match_int(&args[0], &swappiness))
> +				return -EINVAL;
> +			if (swappiness < MIN_SWAPPINESS ||
> +			    swappiness > MAX_SWAPPINESS)
> +				return -EINVAL;
> +			break;
> +		case MEMORY_RECLAIM_SWAPPINESS_MAX:
> +			swappiness = SWAPPINESS_ANON_ONLY;
> +			break;
> +		default:
> +			return -EINVAL;
> +		}
> +	}
> +
> +	while (nr_reclaimed < nr_to_reclaim) {
> +		/* Will converge on zero, but reclaim enforces a minimum */
> +		unsigned long batch_size = (nr_to_reclaim - nr_reclaimed) / 4;
> +		unsigned long reclaimed;
> +
> +		if (signal_pending(current))
> +			return -EINTR;
> +
> +		/*
> +		 * This is the final attempt, drain percpu lru caches in the
> +		 * hope of introducing more evictable pages.
> +		 */
> +		if (!nr_retries)
> +			lru_add_drain_all();
> +
> +		if (memcg) {
> +			unsigned int reclaim_options;
> +
> +			reclaim_options = MEMCG_RECLAIM_MAY_SWAP |
> +					  MEMCG_RECLAIM_PROACTIVE;
> +			reclaimed = try_to_free_mem_cgroup_pages(memcg,
> +						 batch_size, GFP_KERNEL,
> +						 reclaim_options,
> +						 swappiness == -1 ? NULL : &swappiness);
> +		} else {
> +			return -EINVAL;
> +		}
> +
> +		if (!reclaimed && !nr_retries--)
> +			return -EAGAIN;
> +
> +		nr_reclaimed += reclaimed;
> +	}
> +
> +	return 0;
> +}
> +
>  #endif

Should this really be inside CONFIG_NUMA? It was moved from outside of
CONFIG_NUMA where it's now called which results in a build failure if
it's disabled. Or is there a stub missing?

>  
>  /**
> -- 
> 2.39.5
> 

Regards,
Klara Modin
Re: [PATCH 2/4] mm/memcg: make memory.reclaim interface generic
Posted by Andrew Morton 3 months, 2 weeks ago
On Mon, 23 Jun 2025 11:58:49 -0700 Davidlohr Bueso <dave@stgolabs.net> wrote:

> This adds a general call for both parsing as well as the
> common reclaim semantics. memcg is still the only user and
> no change in semantics.
> 
> +int user_proactive_reclaim(char *buf,
> +			   struct mem_cgroup *memcg, pg_data_t *pgdat);

Feeling nitty, is this a good name for it?  It's hard to imagine what a
function called "user_proactive_reclaim" actually does.

That it isn't documented isn't helpful either!
Re: [PATCH 2/4] mm/memcg: make memory.reclaim interface generic
Posted by Davidlohr Bueso 3 months, 2 weeks ago
On Mon, 23 Jun 2025, Andrew Morton wrote:

>On Mon, 23 Jun 2025 11:58:49 -0700 Davidlohr Bueso <dave@stgolabs.net> wrote:
>
>> This adds a general call for both parsing as well as the
>> common reclaim semantics. memcg is still the only user and
>> no change in semantics.
>>
>> +int user_proactive_reclaim(char *buf,
>> +			   struct mem_cgroup *memcg, pg_data_t *pgdat);
>
>Feeling nitty, is this a good name for it?  It's hard to imagine what a
>function called "user_proactive_reclaim" actually does.

I'm open to another name, sure. But imo the chosen one is actually pretty
descriptive: you know it's coming from userspace (justifying the 'buf'),
you know this is not about memory pressure and the memcg/pgdat parameters
tell the possible interfaces. Would prefixing a 'do_' be any better?

>That it isn't documented isn't helpful either!

I had done this but felt rather redundant and unnecessary, and further
don't expect for it go gain any other users. But ok, will add.

Thanks,
Davidlohr