Memory protection (min/low) requires a constant tracking of
protected memory usage. propagate_protected_usage() is called
on each page counters update and does a number of operations
even in cases when the actual memory protection functionality
is not supported (e.g. hugetlb cgroups or memcg swap counters).
It's obviously inefficient and leads to a waste of CPU cycles.
It can be addressed by calling propagate_protected_usage() only
for the counters which do support memory guarantees. As of now
it's only memcg->memory - the unified memory memcg counter.
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
---
include/linux/page_counter.h | 8 +++++++-
mm/hugetlb_cgroup.c | 4 ++--
mm/memcontrol.c | 16 ++++++++--------
mm/page_counter.c | 16 +++++++++++++---
4 files changed, 30 insertions(+), 14 deletions(-)
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 860f313182e7..b31fd5b208aa 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -32,6 +32,7 @@ struct page_counter {
/* Keep all the read most fields in a separete cacheline. */
CACHELINE_PADDING(_pad2_);
+ bool protection_support;
unsigned long min;
unsigned long low;
unsigned long high;
@@ -45,12 +46,17 @@ struct page_counter {
#define PAGE_COUNTER_MAX (LONG_MAX / PAGE_SIZE)
#endif
+/*
+ * Protection is supported only for the first counter (with id 0).
+ */
static inline void page_counter_init(struct page_counter *counter,
- struct page_counter *parent)
+ struct page_counter *parent,
+ bool protection_support)
{
atomic_long_set(&counter->usage, 0);
counter->max = PAGE_COUNTER_MAX;
counter->parent = parent;
+ counter->protection_support = protection_support;
}
static inline unsigned long page_counter_read(struct page_counter *counter)
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index f443a56409a9..d8d0e665caed 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -114,10 +114,10 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
}
page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup,
idx),
- fault_parent);
+ fault_parent, false);
page_counter_init(
hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
- rsvd_parent);
+ rsvd_parent, false);
limit = round_down(PAGE_COUNTER_MAX,
pages_per_huge_page(&hstates[idx]));
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 87fa448b731f..45c0f816a974 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3579,21 +3579,21 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
if (parent) {
WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent));
- page_counter_init(&memcg->memory, &parent->memory);
- page_counter_init(&memcg->swap, &parent->swap);
+ page_counter_init(&memcg->memory, &parent->memory, true);
+ page_counter_init(&memcg->swap, &parent->swap, false);
#ifdef CONFIG_MEMCG_V1
WRITE_ONCE(memcg->oom_kill_disable, READ_ONCE(parent->oom_kill_disable));
- page_counter_init(&memcg->kmem, &parent->kmem);
- page_counter_init(&memcg->tcpmem, &parent->tcpmem);
+ page_counter_init(&memcg->kmem, &parent->kmem, false);
+ page_counter_init(&memcg->tcpmem, &parent->tcpmem, false);
#endif
} else {
init_memcg_stats();
init_memcg_events();
- page_counter_init(&memcg->memory, NULL);
- page_counter_init(&memcg->swap, NULL);
+ page_counter_init(&memcg->memory, NULL, true);
+ page_counter_init(&memcg->swap, NULL, false);
#ifdef CONFIG_MEMCG_V1
- page_counter_init(&memcg->kmem, NULL);
- page_counter_init(&memcg->tcpmem, NULL);
+ page_counter_init(&memcg->kmem, NULL, false);
+ page_counter_init(&memcg->tcpmem, NULL, false);
#endif
root_mem_cgroup = memcg;
return &memcg->css;
diff --git a/mm/page_counter.c b/mm/page_counter.c
index ad9bdde5d5d2..a54382a58ace 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -13,6 +13,11 @@
#include <linux/bug.h>
#include <asm/page.h>
+static bool track_protection(struct page_counter *c)
+{
+ return c->protection_support;
+}
+
static void propagate_protected_usage(struct page_counter *c,
unsigned long usage)
{
@@ -57,7 +62,8 @@ void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
new = 0;
atomic_long_set(&counter->usage, new);
}
- propagate_protected_usage(counter, new);
+ if (track_protection(counter))
+ propagate_protected_usage(counter, new);
}
/**
@@ -70,12 +76,14 @@ void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
{
struct page_counter *c;
+ bool protection = track_protection(counter);
for (c = counter; c; c = c->parent) {
long new;
new = atomic_long_add_return(nr_pages, &c->usage);
- propagate_protected_usage(c, new);
+ if (protection)
+ propagate_protected_usage(c, new);
/*
* This is indeed racy, but we can live with some
* inaccuracy in the watermark.
@@ -112,6 +120,7 @@ bool page_counter_try_charge(struct page_counter *counter,
struct page_counter **fail)
{
struct page_counter *c;
+ bool protection = track_protection(counter);
for (c = counter; c; c = c->parent) {
long new;
@@ -141,7 +150,8 @@ bool page_counter_try_charge(struct page_counter *counter,
*fail = c;
goto failed;
}
- propagate_protected_usage(c, new);
+ if (protection)
+ propagate_protected_usage(c, new);
/* see comment on page_counter_charge */
if (new > READ_ONCE(c->local_watermark)) {
--
2.46.0.rc1.232.g9752f9e123-goog
On Wed, Jul 24, 2024 at 1:21 PM Roman Gushchin <roman.gushchin@linux.dev> wrote:
>
> Memory protection (min/low) requires a constant tracking of
> protected memory usage. propagate_protected_usage() is called
> on each page counters update and does a number of operations
> even in cases when the actual memory protection functionality
> is not supported (e.g. hugetlb cgroups or memcg swap counters).
>
> It's obviously inefficient and leads to a waste of CPU cycles.
> It can be addressed by calling propagate_protected_usage() only
> for the counters which do support memory guarantees. As of now
> it's only memcg->memory - the unified memory memcg counter.
>
> Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
> ---
> include/linux/page_counter.h | 8 +++++++-
> mm/hugetlb_cgroup.c | 4 ++--
> mm/memcontrol.c | 16 ++++++++--------
> mm/page_counter.c | 16 +++++++++++++---
> 4 files changed, 30 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
> index 860f313182e7..b31fd5b208aa 100644
> --- a/include/linux/page_counter.h
> +++ b/include/linux/page_counter.h
> @@ -32,6 +32,7 @@ struct page_counter {
> /* Keep all the read most fields in a separete cacheline. */
> CACHELINE_PADDING(_pad2_);
>
> + bool protection_support;
> unsigned long min;
> unsigned long low;
> unsigned long high;
> @@ -45,12 +46,17 @@ struct page_counter {
> #define PAGE_COUNTER_MAX (LONG_MAX / PAGE_SIZE)
> #endif
>
> +/*
> + * Protection is supported only for the first counter (with id 0).
> + */
> static inline void page_counter_init(struct page_counter *counter,
> - struct page_counter *parent)
> + struct page_counter *parent,
> + bool protection_support)
Would it be better to make this an internal helper (e.g.
__page_counter_init()), and add another API function that passes in
protection_support=true, for example:
static inline void page_counter_init_protected(..)
{
__page_counter_init(.., true);
}
This will get rid of the naked booleans at the callsites of
page_counter_init(), which are more difficult to interpret. It will
also reduce the diff as we only need to change the page_counter_init()
calls of memcg->memory.
WDYT?
> {
> atomic_long_set(&counter->usage, 0);
> counter->max = PAGE_COUNTER_MAX;
> counter->parent = parent;
> + counter->protection_support = protection_support;
> }
[..]
On Wed, Jul 24, 2024 at 04:13:17PM -0700, Yosry Ahmed wrote:
> On Wed, Jul 24, 2024 at 1:21 PM Roman Gushchin <roman.gushchin@linux.dev> wrote:
> >
> > Memory protection (min/low) requires a constant tracking of
> > protected memory usage. propagate_protected_usage() is called
> > on each page counters update and does a number of operations
> > even in cases when the actual memory protection functionality
> > is not supported (e.g. hugetlb cgroups or memcg swap counters).
> >
> > It's obviously inefficient and leads to a waste of CPU cycles.
> > It can be addressed by calling propagate_protected_usage() only
> > for the counters which do support memory guarantees. As of now
> > it's only memcg->memory - the unified memory memcg counter.
> >
> > Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
> > ---
> > include/linux/page_counter.h | 8 +++++++-
> > mm/hugetlb_cgroup.c | 4 ++--
> > mm/memcontrol.c | 16 ++++++++--------
> > mm/page_counter.c | 16 +++++++++++++---
> > 4 files changed, 30 insertions(+), 14 deletions(-)
> >
> > diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
> > index 860f313182e7..b31fd5b208aa 100644
> > --- a/include/linux/page_counter.h
> > +++ b/include/linux/page_counter.h
> > @@ -32,6 +32,7 @@ struct page_counter {
> > /* Keep all the read most fields in a separete cacheline. */
> > CACHELINE_PADDING(_pad2_);
> >
> > + bool protection_support;
> > unsigned long min;
> > unsigned long low;
> > unsigned long high;
> > @@ -45,12 +46,17 @@ struct page_counter {
> > #define PAGE_COUNTER_MAX (LONG_MAX / PAGE_SIZE)
> > #endif
> >
> > +/*
> > + * Protection is supported only for the first counter (with id 0).
> > + */
> > static inline void page_counter_init(struct page_counter *counter,
> > - struct page_counter *parent)
> > + struct page_counter *parent,
> > + bool protection_support)
>
> Would it be better to make this an internal helper (e.g.
> __page_counter_init()), and add another API function that passes in
> protection_support=true, for example:
>
> static inline void page_counter_init_protected(..)
> {
> __page_counter_init(.., true);
> }
>
> This will get rid of the naked booleans at the callsites of
> page_counter_init(), which are more difficult to interpret. It will
> also reduce the diff as we only need to change the page_counter_init()
> calls of memcg->memory.
>
> WDYT?
No strong opinion here. There are basically 2 call sites and I don't expect
this number to grow, so not sure if it makes sense to add 2 new helpers.
Another option I thought about is to leave page_counter_init() as it is
and add a separate function to enable the protection tracking.
Thanks!
On Wed, Jul 24, 2024 at 4:32 PM Roman Gushchin <roman.gushchin@linux.dev> wrote:
>
> On Wed, Jul 24, 2024 at 04:13:17PM -0700, Yosry Ahmed wrote:
> > On Wed, Jul 24, 2024 at 1:21 PM Roman Gushchin <roman.gushchin@linux.dev> wrote:
> > >
> > > Memory protection (min/low) requires a constant tracking of
> > > protected memory usage. propagate_protected_usage() is called
> > > on each page counters update and does a number of operations
> > > even in cases when the actual memory protection functionality
> > > is not supported (e.g. hugetlb cgroups or memcg swap counters).
> > >
> > > It's obviously inefficient and leads to a waste of CPU cycles.
> > > It can be addressed by calling propagate_protected_usage() only
> > > for the counters which do support memory guarantees. As of now
> > > it's only memcg->memory - the unified memory memcg counter.
> > >
> > > Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
> > > ---
> > > include/linux/page_counter.h | 8 +++++++-
> > > mm/hugetlb_cgroup.c | 4 ++--
> > > mm/memcontrol.c | 16 ++++++++--------
> > > mm/page_counter.c | 16 +++++++++++++---
> > > 4 files changed, 30 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
> > > index 860f313182e7..b31fd5b208aa 100644
> > > --- a/include/linux/page_counter.h
> > > +++ b/include/linux/page_counter.h
> > > @@ -32,6 +32,7 @@ struct page_counter {
> > > /* Keep all the read most fields in a separete cacheline. */
> > > CACHELINE_PADDING(_pad2_);
> > >
> > > + bool protection_support;
> > > unsigned long min;
> > > unsigned long low;
> > > unsigned long high;
> > > @@ -45,12 +46,17 @@ struct page_counter {
> > > #define PAGE_COUNTER_MAX (LONG_MAX / PAGE_SIZE)
> > > #endif
> > >
> > > +/*
> > > + * Protection is supported only for the first counter (with id 0).
> > > + */
> > > static inline void page_counter_init(struct page_counter *counter,
> > > - struct page_counter *parent)
> > > + struct page_counter *parent,
> > > + bool protection_support)
> >
> > Would it be better to make this an internal helper (e.g.
> > __page_counter_init()), and add another API function that passes in
> > protection_support=true, for example:
> >
> > static inline void page_counter_init_protected(..)
> > {
> > __page_counter_init(.., true);
> > }
> >
> > This will get rid of the naked booleans at the callsites of
> > page_counter_init(), which are more difficult to interpret. It will
> > also reduce the diff as we only need to change the page_counter_init()
> > calls of memcg->memory.
> >
> > WDYT?
>
> No strong opinion here. There are basically 2 call sites and I don't expect
> this number to grow, so not sure if it makes sense to add 2 new helpers.
>
> Another option I thought about is to leave page_counter_init() as it is
> and add a separate function to enable the protection tracking.
Yeah this should work too, in fact it should give us a chance to
choose a more descriptive name that page_counter_init_protected()
(e.g. page_counter_enable_protection()).
My main concern is the naked booleans, and secondarily the unnecessary
diff to other init sites. I also don't feel strongly about it though.
>
> Thanks!
On Wed, Jul 24, 2024 at 08:20:59PM GMT, Roman Gushchin wrote: > Memory protection (min/low) requires a constant tracking of > protected memory usage. propagate_protected_usage() is called > on each page counters update and does a number of operations > even in cases when the actual memory protection functionality > is not supported (e.g. hugetlb cgroups or memcg swap counters). > > It's obviously inefficient and leads to a waste of CPU cycles. > It can be addressed by calling propagate_protected_usage() only > for the counters which do support memory guarantees. As of now > it's only memcg->memory - the unified memory memcg counter. > > Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
© 2016 - 2025 Red Hat, Inc.