[PATCH v2 5/7] memcg: pr_warn_once for unexpected events and stats

Shakeel Butt posted 7 patches 1 year, 7 months ago
There is a newer version of this series
[PATCH v2 5/7] memcg: pr_warn_once for unexpected events and stats
Posted by Shakeel Butt 1 year, 7 months ago
To reduce memory usage by the memcg events and stats, the kernel uses
indirection table and only allocate stats and events which are being
used by the memcg code. To make this more robust, let's add warnings
where unexpected stats and events indexes are used.

Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
---
 mm/memcontrol.c | 43 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 103e0e53e20a..36145089dcf5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -671,9 +671,11 @@ unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
 		return node_page_state(lruvec_pgdat(lruvec), idx);
 
 	i = memcg_stats_index(idx);
-	if (i >= 0) {
+	if (likely(i >= 0)) {
 		pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 		x = READ_ONCE(pn->lruvec_stats->state[i]);
+	} else {
+		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
 	}
 #ifdef CONFIG_SMP
 	if (x < 0)
@@ -693,9 +695,11 @@ unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 		return node_page_state(lruvec_pgdat(lruvec), idx);
 
 	i = memcg_stats_index(idx);
-	if (i >= 0) {
+	if (likely(i >= 0)) {
 		pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 		x = READ_ONCE(pn->lruvec_stats->state_local[i]);
+	} else {
+		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
 	}
 #ifdef CONFIG_SMP
 	if (x < 0)
@@ -922,8 +926,10 @@ unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
 	long x;
 	int i = memcg_stats_index(idx);
 
-	if (i < 0)
+	if (unlikely(i < 0)) {
+		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
 		return 0;
+	}
 
 	x = READ_ONCE(memcg->vmstats->state[i]);
 #ifdef CONFIG_SMP
@@ -959,8 +965,13 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
 {
 	int i = memcg_stats_index(idx);
 
-	if (mem_cgroup_disabled() || i < 0)
+	if (mem_cgroup_disabled())
+		return;
+
+	if (unlikely(i < 0)) {
+		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
 		return;
+	}
 
 	__this_cpu_add(memcg->vmstats_percpu->state[i], val);
 	memcg_rstat_updated(memcg, memcg_state_val_in_pages(idx, val));
@@ -972,8 +983,10 @@ static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
 	long x;
 	int i = memcg_stats_index(idx);
 
-	if (i < 0)
+	if (unlikely(i < 0)) {
+		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
 		return 0;
+	}
 
 	x = READ_ONCE(memcg->vmstats->state_local[i]);
 #ifdef CONFIG_SMP
@@ -991,8 +1004,10 @@ static void __mod_memcg_lruvec_state(struct lruvec *lruvec,
 	struct mem_cgroup *memcg;
 	int i = memcg_stats_index(idx);
 
-	if (i < 0)
+	if (unlikely(i < 0)) {
+		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
 		return;
+	}
 
 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 	memcg = pn->memcg;
@@ -1107,8 +1122,13 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
 {
 	int index = memcg_events_index(idx);
 
-	if (mem_cgroup_disabled() || index < 0)
+	if (mem_cgroup_disabled())
+		return;
+
+	if (unlikely(index < 0)) {
+		pr_warn_once("%s: event item index: %d\n", __func__, idx);
 		return;
+	}
 
 	memcg_stats_lock();
 	__this_cpu_add(memcg->vmstats_percpu->events[index], count);
@@ -1120,8 +1140,11 @@ static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
 {
 	int index = memcg_events_index(event);
 
-	if (index < 0)
+	if (unlikely(index < 0)) {
+		pr_warn_once("%s: event item index: %d\n", __func__, event);
 		return 0;
+	}
+
 	return READ_ONCE(memcg->vmstats->events[index]);
 }
 
@@ -1129,8 +1152,10 @@ static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
 {
 	int index = memcg_events_index(event);
 
-	if (index < 0)
+	if (unlikely(index < 0)) {
+		pr_warn_once("%s: event item index: %d\n", __func__, event);
 		return 0;
+	}
 
 	return READ_ONCE(memcg->vmstats->events_local[index]);
 }
-- 
2.43.0
Re: [PATCH v2 5/7] memcg: pr_warn_once for unexpected events and stats
Posted by Roman Gushchin 1 year, 7 months ago
On Fri, Apr 26, 2024 at 05:37:31PM -0700, Shakeel Butt wrote:
> To reduce memory usage by the memcg events and stats, the kernel uses
> indirection table and only allocate stats and events which are being
> used by the memcg code. To make this more robust, let's add warnings
> where unexpected stats and events indexes are used.
> 
> Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> ---
>  mm/memcontrol.c | 43 ++++++++++++++++++++++++++++++++++---------
>  1 file changed, 34 insertions(+), 9 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 103e0e53e20a..36145089dcf5 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -671,9 +671,11 @@ unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
>  		return node_page_state(lruvec_pgdat(lruvec), idx);
>  
>  	i = memcg_stats_index(idx);
> -	if (i >= 0) {
> +	if (likely(i >= 0)) {
>  		pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
>  		x = READ_ONCE(pn->lruvec_stats->state[i]);
> +	} else {
> +		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
>  	}

I think it's generally a CONFIG_DEBUG_VM material. Do we have some extra
concerns here?

Having pr_warn_on_once() would be nice here.
Re: [PATCH v2 5/7] memcg: pr_warn_once for unexpected events and stats
Posted by Shakeel Butt 1 year, 7 months ago
On Mon, Apr 29, 2024 at 09:06:23AM -0700, Roman Gushchin wrote:
> On Fri, Apr 26, 2024 at 05:37:31PM -0700, Shakeel Butt wrote:
> > To reduce memory usage by the memcg events and stats, the kernel uses
> > indirection table and only allocate stats and events which are being
> > used by the memcg code. To make this more robust, let's add warnings
> > where unexpected stats and events indexes are used.
> > 
> > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> > ---
> >  mm/memcontrol.c | 43 ++++++++++++++++++++++++++++++++++---------
> >  1 file changed, 34 insertions(+), 9 deletions(-)
> > 
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 103e0e53e20a..36145089dcf5 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -671,9 +671,11 @@ unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
> >  		return node_page_state(lruvec_pgdat(lruvec), idx);
> >  
> >  	i = memcg_stats_index(idx);
> > -	if (i >= 0) {
> > +	if (likely(i >= 0)) {
> >  		pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> >  		x = READ_ONCE(pn->lruvec_stats->state[i]);
> > +	} else {
> > +		pr_warn_once("%s: stat item index: %d\n", __func__, idx);
> >  	}
> 
> I think it's generally a CONFIG_DEBUG_VM material. Do we have some extra
> concerns here?
> 
> Having pr_warn_on_once() would be nice here.

No extra concern, just want this indirection table to be up to date in
future.
Re: [PATCH v2 5/7] memcg: pr_warn_once for unexpected events and stats
Posted by Yosry Ahmed 1 year, 7 months ago
On Fri, Apr 26, 2024 at 5:38 PM Shakeel Butt <shakeel.butt@linux.dev> wrote:
>
> To reduce memory usage by the memcg events and stats, the kernel uses
> indirection table and only allocate stats and events which are being
> used by the memcg code. To make this more robust, let's add warnings
> where unexpected stats and events indexes are used.
>
> Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> ---
>  mm/memcontrol.c | 43 ++++++++++++++++++++++++++++++++++---------
>  1 file changed, 34 insertions(+), 9 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 103e0e53e20a..36145089dcf5 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -671,9 +671,11 @@ unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
>                 return node_page_state(lruvec_pgdat(lruvec), idx);
>
>         i = memcg_stats_index(idx);
> -       if (i >= 0) {
> +       if (likely(i >= 0)) {
>                 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
>                 x = READ_ONCE(pn->lruvec_stats->state[i]);
> +       } else {
> +               pr_warn_once("%s: stat item index: %d\n", __func__, idx);
>         }

Can we make these more compact by using WARN_ON_ONCE() instead:

if (WARN_ON_ONCE(i < 0))
         return 0;

I guess the advantage of using pr_warn_once() is that we get to print
the exact stat index, but the stack trace from WARN_ON_ONCE() should
make it obvious in most cases AFAICT.

No strong opinions either way.
Re: [PATCH v2 5/7] memcg: pr_warn_once for unexpected events and stats
Posted by Shakeel Butt 1 year, 7 months ago
On Fri, Apr 26, 2024 at 05:58:16PM -0700, Yosry Ahmed wrote:
> On Fri, Apr 26, 2024 at 5:38 PM Shakeel Butt <shakeel.butt@linux.dev> wrote:
> >
> > To reduce memory usage by the memcg events and stats, the kernel uses
> > indirection table and only allocate stats and events which are being
> > used by the memcg code. To make this more robust, let's add warnings
> > where unexpected stats and events indexes are used.
> >
> > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> > ---
> >  mm/memcontrol.c | 43 ++++++++++++++++++++++++++++++++++---------
> >  1 file changed, 34 insertions(+), 9 deletions(-)
> >
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 103e0e53e20a..36145089dcf5 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -671,9 +671,11 @@ unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
> >                 return node_page_state(lruvec_pgdat(lruvec), idx);
> >
> >         i = memcg_stats_index(idx);
> > -       if (i >= 0) {
> > +       if (likely(i >= 0)) {
> >                 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> >                 x = READ_ONCE(pn->lruvec_stats->state[i]);
> > +       } else {
> > +               pr_warn_once("%s: stat item index: %d\n", __func__, idx);
> >         }
> 
> Can we make these more compact by using WARN_ON_ONCE() instead:
> 
> if (WARN_ON_ONCE(i < 0))
>          return 0;
> 
> I guess the advantage of using pr_warn_once() is that we get to print
> the exact stat index, but the stack trace from WARN_ON_ONCE() should
> make it obvious in most cases AFAICT.
> 
> No strong opinions either way.

One reason I used pr_warn_once() over WARN_ON_ONCE() is the syzbot
trigger. No need to trip the bot over this error condition.
Re: [PATCH v2 5/7] memcg: pr_warn_once for unexpected events and stats
Posted by Johannes Weiner 1 year, 7 months ago
On Fri, Apr 26, 2024 at 06:18:13PM -0700, Shakeel Butt wrote:
> On Fri, Apr 26, 2024 at 05:58:16PM -0700, Yosry Ahmed wrote:
> > On Fri, Apr 26, 2024 at 5:38 PM Shakeel Butt <shakeel.butt@linux.dev> wrote:
> > >
> > > To reduce memory usage by the memcg events and stats, the kernel uses
> > > indirection table and only allocate stats and events which are being
> > > used by the memcg code. To make this more robust, let's add warnings
> > > where unexpected stats and events indexes are used.
> > >
> > > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> > > ---
> > >  mm/memcontrol.c | 43 ++++++++++++++++++++++++++++++++++---------
> > >  1 file changed, 34 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 103e0e53e20a..36145089dcf5 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -671,9 +671,11 @@ unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
> > >                 return node_page_state(lruvec_pgdat(lruvec), idx);
> > >
> > >         i = memcg_stats_index(idx);
> > > -       if (i >= 0) {
> > > +       if (likely(i >= 0)) {
> > >                 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> > >                 x = READ_ONCE(pn->lruvec_stats->state[i]);
> > > +       } else {
> > > +               pr_warn_once("%s: stat item index: %d\n", __func__, idx);
> > >         }
> > 
> > Can we make these more compact by using WARN_ON_ONCE() instead:
> > 
> > if (WARN_ON_ONCE(i < 0))
> >          return 0;
> > 
> > I guess the advantage of using pr_warn_once() is that we get to print
> > the exact stat index, but the stack trace from WARN_ON_ONCE() should
> > make it obvious in most cases AFAICT.

if (WARN_ONCE(i < 0, "stat item %d not in memcg_node_stat_items\n", i))
	return 0;

should work?

> > No strong opinions either way.
> 
> One reason I used pr_warn_once() over WARN_ON_ONCE() is the syzbot
> trigger. No need to trip the bot over this error condition.

The warn splat is definitely quite verbose. But I think that would
only be annoying initially, in case a site was missed. Down the line,
it seems helpful to have this stand out to somebody who is trying to
add a new cgroup stat and forgets to update the right enums.
Re: [PATCH v2 5/7] memcg: pr_warn_once for unexpected events and stats
Posted by Shakeel Butt 1 year, 7 months ago
On Sat, Apr 27, 2024 at 10:22:34AM -0400, Johannes Weiner wrote:
> On Fri, Apr 26, 2024 at 06:18:13PM -0700, Shakeel Butt wrote:
> > On Fri, Apr 26, 2024 at 05:58:16PM -0700, Yosry Ahmed wrote:
> > > On Fri, Apr 26, 2024 at 5:38 PM Shakeel Butt <shakeel.butt@linux.dev> wrote:
[...]
> > > 
> > > Can we make these more compact by using WARN_ON_ONCE() instead:
> > > 
> > > if (WARN_ON_ONCE(i < 0))
> > >          return 0;
> > > 
> > > I guess the advantage of using pr_warn_once() is that we get to print
> > > the exact stat index, but the stack trace from WARN_ON_ONCE() should
> > > make it obvious in most cases AFAICT.
> 
> if (WARN_ONCE(i < 0, "stat item %d not in memcg_node_stat_items\n", i))
> 	return 0;
> 
> should work?
> 
> > > No strong opinions either way.
> > 
> > One reason I used pr_warn_once() over WARN_ON_ONCE() is the syzbot
> > trigger. No need to trip the bot over this error condition.
> 
> The warn splat is definitely quite verbose. But I think that would
> only be annoying initially, in case a site was missed. Down the line,
> it seems helpful to have this stand out to somebody who is trying to
> add a new cgroup stat and forgets to update the right enums.

Sounds good to me. I will change it to WARN_ONCE().