Let's switch all memcg_stock locks acquire and release places to not
disable and enable irqs. There are two still functions (i.e.
mod_objcg_state() and drain_obj_stock) which needs to disable irqs to
update the stats on non-RT kernels. For now add a simple wrapper for
that functionality.
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
---
mm/memcontrol.c | 83 ++++++++++++++++++++++++++++++++-----------------
1 file changed, 54 insertions(+), 29 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ba5d004049d3..fa28efa298f4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1796,22 +1796,17 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
*
* returns true if successful, false otherwise.
*/
-static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
- gfp_t gfp_mask)
+static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
struct memcg_stock_pcp *stock;
unsigned int stock_pages;
- unsigned long flags;
bool ret = false;
if (nr_pages > MEMCG_CHARGE_BATCH)
return ret;
- if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) {
- if (!gfpflags_allow_spinning(gfp_mask))
+ if (!localtry_trylock(&memcg_stock.stock_lock))
return ret;
- localtry_lock_irqsave(&memcg_stock.stock_lock, flags);
- }
stock = this_cpu_ptr(&memcg_stock);
stock_pages = READ_ONCE(stock->nr_pages);
@@ -1820,7 +1815,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
ret = true;
}
- localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ localtry_unlock(&memcg_stock.stock_lock);
return ret;
}
@@ -1855,7 +1850,6 @@ static void drain_stock(struct memcg_stock_pcp *stock)
static void drain_local_stock(struct work_struct *dummy)
{
struct memcg_stock_pcp *stock;
- unsigned long flags;
lockdep_assert_once(in_task());
@@ -1864,14 +1858,14 @@ static void drain_local_stock(struct work_struct *dummy)
* drain_stock races is that we always operate on local CPU stock
* here with IRQ disabled
*/
- localtry_lock_irqsave(&memcg_stock.stock_lock, flags);
+ localtry_lock(&memcg_stock.stock_lock);
stock = this_cpu_ptr(&memcg_stock);
drain_obj_stock(stock);
drain_stock(stock);
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
- localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ localtry_unlock(&memcg_stock.stock_lock);
}
/* Should never be called with root_mem_cgroup. */
@@ -1879,9 +1873,8 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
struct memcg_stock_pcp *stock;
unsigned int stock_pages;
- unsigned long flags;
- if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) {
+ if (!localtry_trylock(&memcg_stock.stock_lock)) {
/*
* In case of unlikely failure to lock percpu stock_lock
* uncharge memcg directly.
@@ -1902,7 +1895,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
if (stock_pages > MEMCG_CHARGE_BATCH)
drain_stock(stock);
- localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ localtry_unlock(&memcg_stock.stock_lock);
}
/*
@@ -1953,17 +1946,12 @@ void drain_all_stock(struct mem_cgroup *root_memcg)
static int memcg_hotplug_cpu_dead(unsigned int cpu)
{
struct memcg_stock_pcp *stock;
- unsigned long flags;
lockdep_assert_once(in_task());
stock = &per_cpu(memcg_stock, cpu);
- /* drain_obj_stock requires stock_lock */
- localtry_lock_irqsave(&memcg_stock.stock_lock, flags);
drain_obj_stock(stock);
- localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
-
drain_stock(stock);
return 0;
@@ -2254,7 +2242,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
unsigned long pflags;
retry:
- if (consume_stock(memcg, nr_pages, gfp_mask))
+ if (consume_stock(memcg, nr_pages))
return 0;
if (!gfpflags_allow_spinning(gfp_mask))
@@ -2757,6 +2745,28 @@ static void replace_stock_objcg(struct memcg_stock_pcp *stock,
WRITE_ONCE(stock->cached_objcg, objcg);
}
+static unsigned long rt_lock(void)
+{
+#ifdef CONFIG_PREEMPT_RT
+ migrate_disable();
+ return 0;
+#else
+ unsigned long flags = 0;
+
+ local_irq_save(flags);
+ return flags;
+#endif
+}
+
+static void rt_unlock(unsigned long flags)
+{
+#ifdef CONFIG_PREEMPT_RT
+ migrate_enable();
+#else
+ local_irq_restore(flags);
+#endif
+}
+
static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
enum node_stat_item idx, int nr)
{
@@ -2764,7 +2774,8 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
unsigned long flags;
int *bytes;
- if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) {
+ if (!localtry_trylock(&memcg_stock.stock_lock)) {
+ /* Do we need mix_rt_[un]lock here too. */
__mod_objcg_mlstate(objcg, pgdat, idx, nr);
return;
}
@@ -2783,6 +2794,8 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
/* Flush the existing cached vmstat data */
struct pglist_data *oldpg = stock->cached_pgdat;
+ flags = rt_lock();
+
if (stock->nr_slab_reclaimable_b) {
__mod_objcg_mlstate(objcg, oldpg, NR_SLAB_RECLAIMABLE_B,
stock->nr_slab_reclaimable_b);
@@ -2793,6 +2806,8 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
stock->nr_slab_unreclaimable_b);
stock->nr_slab_unreclaimable_b = 0;
}
+
+ rt_unlock(flags);
stock->cached_pgdat = pgdat;
}
@@ -2814,19 +2829,21 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
nr = 0;
}
}
- if (nr)
+ if (nr) {
+ flags = rt_lock();
__mod_objcg_mlstate(objcg, pgdat, idx, nr);
+ rt_unlock(flags);
+ }
- localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ localtry_unlock(&memcg_stock.stock_lock);
}
static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
{
struct memcg_stock_pcp *stock;
- unsigned long flags;
bool ret = false;
- if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags))
+ if (!localtry_trylock(&memcg_stock.stock_lock))
return ret;
stock = this_cpu_ptr(&memcg_stock);
@@ -2835,7 +2852,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
ret = true;
}
- localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ localtry_unlock(&memcg_stock.stock_lock);
return ret;
}
@@ -2843,10 +2860,16 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
static void drain_obj_stock(struct memcg_stock_pcp *stock)
{
struct obj_cgroup *old = READ_ONCE(stock->cached_objcg);
+ unsigned long flags;
+ bool locked = stock->nr_bytes || stock->nr_slab_reclaimable_b ||
+ stock->nr_slab_unreclaimable_b;
if (!old)
return;
+ if (locked)
+ flags = rt_lock();
+
if (stock->nr_bytes) {
unsigned int nr_pages = stock->nr_bytes >> PAGE_SHIFT;
unsigned int nr_bytes = stock->nr_bytes & (PAGE_SIZE - 1);
@@ -2897,6 +2920,9 @@ static void drain_obj_stock(struct memcg_stock_pcp *stock)
stock->cached_pgdat = NULL;
}
+ if (locked)
+ rt_unlock(flags);
+
WRITE_ONCE(stock->cached_objcg, NULL);
obj_cgroup_put(old);
}
@@ -2920,10 +2946,9 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
bool allow_uncharge)
{
struct memcg_stock_pcp *stock;
- unsigned long flags;
unsigned int nr_pages = 0;
- if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) {
+ if (!localtry_trylock(&memcg_stock.stock_lock)) {
atomic_add(nr_bytes, &objcg->nr_charged_bytes);
return;
}
@@ -2940,7 +2965,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
stock->nr_bytes &= (PAGE_SIZE - 1);
}
- localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ localtry_unlock(&memcg_stock.stock_lock);
if (nr_pages)
obj_cgroup_uncharge_pages(objcg, nr_pages);
--
2.47.1
On 3/14/25 07:15, Shakeel Butt wrote:
> Let's switch all memcg_stock locks acquire and release places to not
> disable and enable irqs. There are two still functions (i.e.
> mod_objcg_state() and drain_obj_stock) which needs to disable irqs to
> update the stats on non-RT kernels. For now add a simple wrapper for
> that functionality.
BTW, which part of __mod_objcg_mlstate() really needs disabled irqs and not
just preemption? I see it does rcu_read_lock() anyway, which disables
preemption. Then in __mod_memcg_lruvec_state() we do some __this_cpu_add()
updates. I think these also are fine with just disabled preemption as they
are atomic vs irqs (but don't need LOCK prefix to be atomic vs other cpus
updates).
Is it just memcg_rstat_updated() which does READ_ONCE/WRITE_ONCE? Could we
perhaps just change it to operations where disabled preemption is enough?
> Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> ---
> mm/memcontrol.c | 83 ++++++++++++++++++++++++++++++++-----------------
> 1 file changed, 54 insertions(+), 29 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index ba5d004049d3..fa28efa298f4 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1796,22 +1796,17 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
> *
> * returns true if successful, false otherwise.
> */
> -static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
> - gfp_t gfp_mask)
> +static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
> {
> struct memcg_stock_pcp *stock;
> unsigned int stock_pages;
> - unsigned long flags;
> bool ret = false;
>
> if (nr_pages > MEMCG_CHARGE_BATCH)
> return ret;
>
> - if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) {
> - if (!gfpflags_allow_spinning(gfp_mask))
> + if (!localtry_trylock(&memcg_stock.stock_lock))
> return ret;
> - localtry_lock_irqsave(&memcg_stock.stock_lock, flags);
> - }
>
> stock = this_cpu_ptr(&memcg_stock);
> stock_pages = READ_ONCE(stock->nr_pages);
> @@ -1820,7 +1815,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
> ret = true;
> }
>
> - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
> + localtry_unlock(&memcg_stock.stock_lock);
>
> return ret;
> }
> @@ -1855,7 +1850,6 @@ static void drain_stock(struct memcg_stock_pcp *stock)
> static void drain_local_stock(struct work_struct *dummy)
> {
> struct memcg_stock_pcp *stock;
> - unsigned long flags;
>
> lockdep_assert_once(in_task());
>
> @@ -1864,14 +1858,14 @@ static void drain_local_stock(struct work_struct *dummy)
> * drain_stock races is that we always operate on local CPU stock
> * here with IRQ disabled
> */
> - localtry_lock_irqsave(&memcg_stock.stock_lock, flags);
> + localtry_lock(&memcg_stock.stock_lock);
>
> stock = this_cpu_ptr(&memcg_stock);
> drain_obj_stock(stock);
> drain_stock(stock);
> clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
>
> - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
> + localtry_unlock(&memcg_stock.stock_lock);
> }
>
> /* Should never be called with root_mem_cgroup. */
> @@ -1879,9 +1873,8 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
> {
> struct memcg_stock_pcp *stock;
> unsigned int stock_pages;
> - unsigned long flags;
>
> - if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) {
> + if (!localtry_trylock(&memcg_stock.stock_lock)) {
> /*
> * In case of unlikely failure to lock percpu stock_lock
> * uncharge memcg directly.
> @@ -1902,7 +1895,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
> if (stock_pages > MEMCG_CHARGE_BATCH)
> drain_stock(stock);
>
> - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
> + localtry_unlock(&memcg_stock.stock_lock);
> }
>
> /*
> @@ -1953,17 +1946,12 @@ void drain_all_stock(struct mem_cgroup *root_memcg)
> static int memcg_hotplug_cpu_dead(unsigned int cpu)
> {
> struct memcg_stock_pcp *stock;
> - unsigned long flags;
>
> lockdep_assert_once(in_task());
>
> stock = &per_cpu(memcg_stock, cpu);
>
> - /* drain_obj_stock requires stock_lock */
> - localtry_lock_irqsave(&memcg_stock.stock_lock, flags);
> drain_obj_stock(stock);
> - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
> -
> drain_stock(stock);
>
> return 0;
> @@ -2254,7 +2242,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
> unsigned long pflags;
>
> retry:
> - if (consume_stock(memcg, nr_pages, gfp_mask))
> + if (consume_stock(memcg, nr_pages))
> return 0;
>
> if (!gfpflags_allow_spinning(gfp_mask))
> @@ -2757,6 +2745,28 @@ static void replace_stock_objcg(struct memcg_stock_pcp *stock,
> WRITE_ONCE(stock->cached_objcg, objcg);
> }
>
> +static unsigned long rt_lock(void)
> +{
> +#ifdef CONFIG_PREEMPT_RT
> + migrate_disable();
> + return 0;
> +#else
> + unsigned long flags = 0;
> +
> + local_irq_save(flags);
> + return flags;
> +#endif
> +}
> +
> +static void rt_unlock(unsigned long flags)
> +{
> +#ifdef CONFIG_PREEMPT_RT
> + migrate_enable();
> +#else
> + local_irq_restore(flags);
> +#endif
> +}
> +
> static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
> enum node_stat_item idx, int nr)
> {
> @@ -2764,7 +2774,8 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
> unsigned long flags;
> int *bytes;
>
> - if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) {
> + if (!localtry_trylock(&memcg_stock.stock_lock)) {
> + /* Do we need mix_rt_[un]lock here too. */
> __mod_objcg_mlstate(objcg, pgdat, idx, nr);
> return;
> }
> @@ -2783,6 +2794,8 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
> /* Flush the existing cached vmstat data */
> struct pglist_data *oldpg = stock->cached_pgdat;
>
> + flags = rt_lock();
> +
> if (stock->nr_slab_reclaimable_b) {
> __mod_objcg_mlstate(objcg, oldpg, NR_SLAB_RECLAIMABLE_B,
> stock->nr_slab_reclaimable_b);
> @@ -2793,6 +2806,8 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
> stock->nr_slab_unreclaimable_b);
> stock->nr_slab_unreclaimable_b = 0;
> }
> +
> + rt_unlock(flags);
> stock->cached_pgdat = pgdat;
> }
>
> @@ -2814,19 +2829,21 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
> nr = 0;
> }
> }
> - if (nr)
> + if (nr) {
> + flags = rt_lock();
> __mod_objcg_mlstate(objcg, pgdat, idx, nr);
> + rt_unlock(flags);
> + }
>
> - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
> + localtry_unlock(&memcg_stock.stock_lock);
> }
>
> static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
> {
> struct memcg_stock_pcp *stock;
> - unsigned long flags;
> bool ret = false;
>
> - if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags))
> + if (!localtry_trylock(&memcg_stock.stock_lock))
> return ret;
>
> stock = this_cpu_ptr(&memcg_stock);
> @@ -2835,7 +2852,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
> ret = true;
> }
>
> - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
> + localtry_unlock(&memcg_stock.stock_lock);
>
> return ret;
> }
> @@ -2843,10 +2860,16 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
> static void drain_obj_stock(struct memcg_stock_pcp *stock)
> {
> struct obj_cgroup *old = READ_ONCE(stock->cached_objcg);
> + unsigned long flags;
> + bool locked = stock->nr_bytes || stock->nr_slab_reclaimable_b ||
> + stock->nr_slab_unreclaimable_b;
>
> if (!old)
> return;
>
> + if (locked)
> + flags = rt_lock();
> +
> if (stock->nr_bytes) {
> unsigned int nr_pages = stock->nr_bytes >> PAGE_SHIFT;
> unsigned int nr_bytes = stock->nr_bytes & (PAGE_SIZE - 1);
> @@ -2897,6 +2920,9 @@ static void drain_obj_stock(struct memcg_stock_pcp *stock)
> stock->cached_pgdat = NULL;
> }
>
> + if (locked)
> + rt_unlock(flags);
> +
> WRITE_ONCE(stock->cached_objcg, NULL);
> obj_cgroup_put(old);
> }
> @@ -2920,10 +2946,9 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
> bool allow_uncharge)
> {
> struct memcg_stock_pcp *stock;
> - unsigned long flags;
> unsigned int nr_pages = 0;
>
> - if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) {
> + if (!localtry_trylock(&memcg_stock.stock_lock)) {
> atomic_add(nr_bytes, &objcg->nr_charged_bytes);
> return;
> }
> @@ -2940,7 +2965,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
> stock->nr_bytes &= (PAGE_SIZE - 1);
> }
>
> - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
> + localtry_unlock(&memcg_stock.stock_lock);
>
> if (nr_pages)
> obj_cgroup_uncharge_pages(objcg, nr_pages);
On 2025-03-14 11:54:34 [+0100], Vlastimil Babka wrote:
> On 3/14/25 07:15, Shakeel Butt wrote:
> > Let's switch all memcg_stock locks acquire and release places to not
> > disable and enable irqs. There are two still functions (i.e.
> > mod_objcg_state() and drain_obj_stock) which needs to disable irqs to
> > update the stats on non-RT kernels. For now add a simple wrapper for
> > that functionality.
>
> BTW, which part of __mod_objcg_mlstate() really needs disabled irqs and not
> just preemption? I see it does rcu_read_lock() anyway, which disables
> preemption. Then in __mod_memcg_lruvec_state() we do some __this_cpu_add()
> updates. I think these also are fine with just disabled preemption as they
> are atomic vs irqs (but don't need LOCK prefix to be atomic vs other cpus
> updates).
__this_cpu_add() is not safe if also used in interrupt context. Some
architectures (not x86) implemented as read, add, write.
this_cpu_add()() does the same but disables interrupts during the
operation.
So __this_cpu_add() should not be used if interrupts are not disabled
and a modification can happen from interrupt context.
> Is it just memcg_rstat_updated() which does READ_ONCE/WRITE_ONCE? Could we
> perhaps just change it to operations where disabled preemption is enough?
>
> > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
…
> > @@ -2757,6 +2745,28 @@ static void replace_stock_objcg(struct memcg_stock_pcp *stock,
> > WRITE_ONCE(stock->cached_objcg, objcg);
> > }
> >
> > +static unsigned long rt_lock(void)
> > +{
No, we don't name it rt_lock(). We have local_lock() for this exact
reason. And migrate_disable() does not protect vs re-enter of the
function on the CPU while local_irq_save() does.
> > +#ifdef CONFIG_PREEMPT_RT
> > + migrate_disable();
> > + return 0;
> > +#else
> > + unsigned long flags = 0;
> > +
> > + local_irq_save(flags);
> > + return flags;
> > +#endif
> > +}
> > +
> > +static void rt_unlock(unsigned long flags)
> > +{
> > +#ifdef CONFIG_PREEMPT_RT
> > + migrate_enable();
> > +#else
> > + local_irq_restore(flags);
> > +#endif
> > +}
> > +
> > static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
> > enum node_stat_item idx, int nr)
> > {
Sebastian
On Fri, Mar 14, 2025 at 12:58:02PM +0100, Sebastian Andrzej Siewior wrote:
> On 2025-03-14 11:54:34 [+0100], Vlastimil Babka wrote:
> > On 3/14/25 07:15, Shakeel Butt wrote:
> > > Let's switch all memcg_stock locks acquire and release places to not
> > > disable and enable irqs. There are two still functions (i.e.
> > > mod_objcg_state() and drain_obj_stock) which needs to disable irqs to
> > > update the stats on non-RT kernels. For now add a simple wrapper for
> > > that functionality.
> >
> > BTW, which part of __mod_objcg_mlstate() really needs disabled irqs and not
> > just preemption? I see it does rcu_read_lock() anyway, which disables
> > preemption. Then in __mod_memcg_lruvec_state() we do some __this_cpu_add()
> > updates. I think these also are fine with just disabled preemption as they
> > are atomic vs irqs (but don't need LOCK prefix to be atomic vs other cpus
> > updates).
>
> __this_cpu_add() is not safe if also used in interrupt context. Some
> architectures (not x86) implemented as read, add, write.
> this_cpu_add()() does the same but disables interrupts during the
> operation.
> So __this_cpu_add() should not be used if interrupts are not disabled
> and a modification can happen from interrupt context.
So, if I use this_cpu_add() instead of __this_cpu_add() in
__mod_memcg_state(), __mod_memcg_lruvec_state(), __count_memcg_events()
then I can call these functions without disabling interrupts. Also
this_cpu_add() does not disable interrupts for x86 and arm64, correct?
For x86 and arm64, can I assume that the cost of this_cpu_add() is the
same as __this_cpu_add()?
>
> > Is it just memcg_rstat_updated() which does READ_ONCE/WRITE_ONCE? Could we
> > perhaps just change it to operations where disabled preemption is enough?
Yes, I will look into it.
> >
> > > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> …
> > > @@ -2757,6 +2745,28 @@ static void replace_stock_objcg(struct memcg_stock_pcp *stock,
> > > WRITE_ONCE(stock->cached_objcg, objcg);
> > > }
> > >
> > > +static unsigned long rt_lock(void)
> > > +{
>
> No, we don't name it rt_lock(). We have local_lock() for this exact
> reason. And migrate_disable() does not protect vs re-enter of the
> function on the CPU while local_irq_save() does.
Thanks for clarification. Here do nothing for RT kernel and disable
interrupts for non-RT kernels. (Let'e see how the other conversation
goes, maybe we can remove the interrupt disabling requirement)
On 2025-03-14 08:55:51 [-0700], Shakeel Butt wrote: > On Fri, Mar 14, 2025 at 12:58:02PM +0100, Sebastian Andrzej Siewior wrote: > > On 2025-03-14 11:54:34 [+0100], Vlastimil Babka wrote: > > > On 3/14/25 07:15, Shakeel Butt wrote: > > > > Let's switch all memcg_stock locks acquire and release places to not > > > > disable and enable irqs. There are two still functions (i.e. > > > > mod_objcg_state() and drain_obj_stock) which needs to disable irqs to > > > > update the stats on non-RT kernels. For now add a simple wrapper for > > > > that functionality. > > > > > > BTW, which part of __mod_objcg_mlstate() really needs disabled irqs and not > > > just preemption? I see it does rcu_read_lock() anyway, which disables > > > preemption. Then in __mod_memcg_lruvec_state() we do some __this_cpu_add() > > > updates. I think these also are fine with just disabled preemption as they > > > are atomic vs irqs (but don't need LOCK prefix to be atomic vs other cpus > > > updates). > > > > __this_cpu_add() is not safe if also used in interrupt context. Some > > architectures (not x86) implemented as read, add, write. > > this_cpu_add()() does the same but disables interrupts during the > > operation. > > So __this_cpu_add() should not be used if interrupts are not disabled > > and a modification can happen from interrupt context. > > So, if I use this_cpu_add() instead of __this_cpu_add() in > __mod_memcg_state(), __mod_memcg_lruvec_state(), __count_memcg_events() > then I can call these functions without disabling interrupts. Also > this_cpu_add() does not disable interrupts for x86 and arm64, correct? > For x86 and arm64, can I assume that the cost of this_cpu_add() is the > same as __this_cpu_add()? on arm64, __this_cpu_add will "load, add, store". preemptible. this_cpu_add() will "disable preemption, atomic-load, add, atomic-store or start over with atomic-load. if succeeded enable preemption and move an" so no, this is not the same. On x86 it is possible to increment a memory value directly with one opcode so you get preempted either before or after that operation. Sebastian
On Fri, Mar 14, 2025 at 05:42:34PM +0100, Sebastian Andrzej Siewior wrote: > On 2025-03-14 08:55:51 [-0700], Shakeel Butt wrote: > > On Fri, Mar 14, 2025 at 12:58:02PM +0100, Sebastian Andrzej Siewior wrote: > > > On 2025-03-14 11:54:34 [+0100], Vlastimil Babka wrote: > > > > On 3/14/25 07:15, Shakeel Butt wrote: > > > > > Let's switch all memcg_stock locks acquire and release places to not > > > > > disable and enable irqs. There are two still functions (i.e. > > > > > mod_objcg_state() and drain_obj_stock) which needs to disable irqs to > > > > > update the stats on non-RT kernels. For now add a simple wrapper for > > > > > that functionality. > > > > > > > > BTW, which part of __mod_objcg_mlstate() really needs disabled irqs and not > > > > just preemption? I see it does rcu_read_lock() anyway, which disables > > > > preemption. Then in __mod_memcg_lruvec_state() we do some __this_cpu_add() > > > > updates. I think these also are fine with just disabled preemption as they > > > > are atomic vs irqs (but don't need LOCK prefix to be atomic vs other cpus > > > > updates). > > > > > > __this_cpu_add() is not safe if also used in interrupt context. Some > > > architectures (not x86) implemented as read, add, write. > > > this_cpu_add()() does the same but disables interrupts during the > > > operation. > > > So __this_cpu_add() should not be used if interrupts are not disabled > > > and a modification can happen from interrupt context. > > > > So, if I use this_cpu_add() instead of __this_cpu_add() in > > __mod_memcg_state(), __mod_memcg_lruvec_state(), __count_memcg_events() > > then I can call these functions without disabling interrupts. Also > > this_cpu_add() does not disable interrupts for x86 and arm64, correct? > > For x86 and arm64, can I assume that the cost of this_cpu_add() is the > > same as __this_cpu_add()? > > on arm64, __this_cpu_add will "load, add, store". preemptible. > this_cpu_add() will "disable preemption, atomic-load, add, atomic-store or > start over with atomic-load. if succeeded enable preemption and move an" So, this_cpu_add() on arm64 is not protected against interrupts but is protected against preemption. We have a following comment in include/linux/percpu-defs.h. Is this not true anymore? /* * Operations with implied preemption/interrupt protection. These * operations can be used without worrying about preemption or interrupt. */ ... #define this_cpu_add(pcp, val) __pcpu_size_call(this_cpu_add_, pcp, val) > > so no, this is not the same. On x86 it is possible to increment a memory > value directly with one opcode so you get preempted either before or > after that operation. > > Sebastian
On 2025-03-14 10:02:47 [-0700], Shakeel Butt wrote: > > > > on arm64, __this_cpu_add will "load, add, store". preemptible. > > this_cpu_add() will "disable preemption, atomic-load, add, atomic-store or > > start over with atomic-load. if succeeded enable preemption and move an" > > So, this_cpu_add() on arm64 is not protected against interrupts but is > protected against preemption. We have a following comment in > include/linux/percpu-defs.h. Is this not true anymore? It performs an atomic update. So it loads exclusive from memory and then stores conditionally if the exclusive monitor did not observe another load on this address. Disabling preemption is only done to ensure that the operation happens on the local-CPU and task gets not moved another CPU during the operation. The concurrent update to the same memory address from an interrupt will be caught by the exclusive monitor. The reason to remain on the same CPU is probably to ensure that __this_cpu_add() in an IRQ-off region does not clash with an atomic update performed elsewhere. While looking at it, there is also the LSE extension which results in a single add _if_ atomic. Sebastian
On Fri, Mar 14, 2025 at 10:02:47AM -0700, Shakeel Butt wrote: > On Fri, Mar 14, 2025 at 05:42:34PM +0100, Sebastian Andrzej Siewior wrote: > > On 2025-03-14 08:55:51 [-0700], Shakeel Butt wrote: > > > On Fri, Mar 14, 2025 at 12:58:02PM +0100, Sebastian Andrzej Siewior wrote: > > > > On 2025-03-14 11:54:34 [+0100], Vlastimil Babka wrote: > > > > > On 3/14/25 07:15, Shakeel Butt wrote: > > > > > > Let's switch all memcg_stock locks acquire and release places to not > > > > > > disable and enable irqs. There are two still functions (i.e. > > > > > > mod_objcg_state() and drain_obj_stock) which needs to disable irqs to > > > > > > update the stats on non-RT kernels. For now add a simple wrapper for > > > > > > that functionality. > > > > > > > > > > BTW, which part of __mod_objcg_mlstate() really needs disabled irqs and not > > > > > just preemption? I see it does rcu_read_lock() anyway, which disables > > > > > preemption. Then in __mod_memcg_lruvec_state() we do some __this_cpu_add() > > > > > updates. I think these also are fine with just disabled preemption as they > > > > > are atomic vs irqs (but don't need LOCK prefix to be atomic vs other cpus > > > > > updates). > > > > > > > > __this_cpu_add() is not safe if also used in interrupt context. Some > > > > architectures (not x86) implemented as read, add, write. > > > > this_cpu_add()() does the same but disables interrupts during the > > > > operation. > > > > So __this_cpu_add() should not be used if interrupts are not disabled > > > > and a modification can happen from interrupt context. > > > > > > So, if I use this_cpu_add() instead of __this_cpu_add() in > > > __mod_memcg_state(), __mod_memcg_lruvec_state(), __count_memcg_events() > > > then I can call these functions without disabling interrupts. Also > > > this_cpu_add() does not disable interrupts for x86 and arm64, correct? > > > For x86 and arm64, can I assume that the cost of this_cpu_add() is the > > > same as __this_cpu_add()? > > > > on arm64, __this_cpu_add will "load, add, store". preemptible. > > this_cpu_add() will "disable preemption, atomic-load, add, atomic-store or > > start over with atomic-load. if succeeded enable preemption and move an" > > So, this_cpu_add() on arm64 is not protected against interrupts but is > protected against preemption. We have a following comment in > include/linux/percpu-defs.h. Is this not true anymore? > > /* > * Operations with implied preemption/interrupt protection. These > * operations can be used without worrying about preemption or interrupt. > */ > ... > #define this_cpu_add(pcp, val) __pcpu_size_call(this_cpu_add_, pcp, val) > Just got clarification from Johannes & Tejun that this_cpu_add() is indeed safe against irqs on arm64 as well. Basically arm64 uses loop of Load Exclusive and Store Exclusive instruction to protect against irqs. Defined in __PERCPU_OP_CASE() macro in arch/arm64/include/asm/percpu.h.
© 2016 - 2025 Red Hat, Inc.