[PATCH 2/4] memcg: separate local_trylock for memcg and obj

Shakeel Butt posted 4 patches 9 months, 2 weeks ago
There is a newer version of this series
[PATCH 2/4] memcg: separate local_trylock for memcg and obj
Posted by Shakeel Butt 9 months, 2 weeks ago
The per-cpu stock_lock protects cached memcg and cached objcg and their
respective fields. However there is no dependency between these fields
and it is better to have fine grained separate locks for cached memcg
and cached objcg. This decoupling of locks allows us to make the memcg
charge cache and objcg charge cache to be nmi safe independently.

At the moment, memcg charge cache is already nmi safe and this
decoupling will allow to make memcg charge cache work without disabling
irqs.

Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
---
 mm/memcontrol.c | 52 +++++++++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 40d0838d88bc..460634e8435f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1777,13 +1777,14 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
 
 #define NR_MEMCG_STOCK 7
 struct memcg_stock_pcp {
-	local_trylock_t stock_lock;
+	local_trylock_t memcg_lock;
 	uint8_t nr_pages[NR_MEMCG_STOCK];
 	struct mem_cgroup *cached[NR_MEMCG_STOCK];
 
+	local_trylock_t obj_lock;
+	unsigned int nr_bytes;
 	struct obj_cgroup *cached_objcg;
 	struct pglist_data *cached_pgdat;
-	unsigned int nr_bytes;
 	int nr_slab_reclaimable_b;
 	int nr_slab_unreclaimable_b;
 
@@ -1792,7 +1793,8 @@ struct memcg_stock_pcp {
 #define FLUSHING_CACHED_CHARGE	0
 };
 static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = {
-	.stock_lock = INIT_LOCAL_TRYLOCK(stock_lock),
+	.memcg_lock = INIT_LOCAL_TRYLOCK(memcg_lock),
+	.obj_lock = INIT_LOCAL_TRYLOCK(obj_lock),
 };
 static DEFINE_MUTEX(percpu_charge_mutex);
 
@@ -1820,7 +1822,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 	int i;
 
 	if (nr_pages > MEMCG_CHARGE_BATCH ||
-	    !local_trylock_irqsave(&memcg_stock.stock_lock, flags))
+	    !local_trylock_irqsave(&memcg_stock.memcg_lock, flags))
 		return ret;
 
 	stock = this_cpu_ptr(&memcg_stock);
@@ -1837,7 +1839,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 		break;
 	}
 
-	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+	local_unlock_irqrestore(&memcg_stock.memcg_lock, flags);
 
 	return ret;
 }
@@ -1883,19 +1885,22 @@ static void drain_local_stock(struct work_struct *dummy)
 	struct memcg_stock_pcp *stock;
 	unsigned long flags;
 
-	/*
-	 * The only protection from cpu hotplug (memcg_hotplug_cpu_dead) vs.
-	 * drain_stock races is that we always operate on local CPU stock
-	 * here with IRQ disabled
-	 */
-	local_lock_irqsave(&memcg_stock.stock_lock, flags);
+	if (WARN_ONCE(!in_task(), "drain in non-task context"))
+		return;
 
+	preempt_disable();
 	stock = this_cpu_ptr(&memcg_stock);
+
+	local_lock_irqsave(&memcg_stock.obj_lock, flags);
 	drain_obj_stock(stock);
+	local_unlock_irqrestore(&memcg_stock.obj_lock, flags);
+
+	local_lock_irqsave(&memcg_stock.memcg_lock, flags);
 	drain_stock_fully(stock);
-	clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
+	local_unlock_irqrestore(&memcg_stock.memcg_lock, flags);
 
-	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+	clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
+	preempt_enable();
 }
 
 static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
@@ -1918,10 +1923,10 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 	VM_WARN_ON_ONCE(mem_cgroup_is_root(memcg));
 
 	if (nr_pages > MEMCG_CHARGE_BATCH ||
-	    !local_trylock_irqsave(&memcg_stock.stock_lock, flags)) {
+	    !local_trylock_irqsave(&memcg_stock.memcg_lock, flags)) {
 		/*
 		 * In case of larger than batch refill or unlikely failure to
-		 * lock the percpu stock_lock, uncharge memcg directly.
+		 * lock the percpu memcg_lock, uncharge memcg directly.
 		 */
 		memcg_uncharge(memcg, nr_pages);
 		return;
@@ -1953,7 +1958,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 		WRITE_ONCE(stock->nr_pages[i], nr_pages);
 	}
 
-	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+	local_unlock_irqrestore(&memcg_stock.memcg_lock, flags);
 }
 
 static bool is_drain_needed(struct memcg_stock_pcp *stock,
@@ -2028,11 +2033,12 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
 
 	stock = &per_cpu(memcg_stock, cpu);
 
-	/* drain_obj_stock requires stock_lock */
-	local_lock_irqsave(&memcg_stock.stock_lock, flags);
+	/* drain_obj_stock requires obj_lock */
+	local_lock_irqsave(&memcg_stock.obj_lock, flags);
 	drain_obj_stock(stock);
-	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+	local_unlock_irqrestore(&memcg_stock.obj_lock, flags);
 
+	/* no need for the local lock */
 	drain_stock_fully(stock);
 
 	return 0;
@@ -2885,7 +2891,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
 	unsigned long flags;
 	bool ret = false;
 
-	local_lock_irqsave(&memcg_stock.stock_lock, flags);
+	local_lock_irqsave(&memcg_stock.obj_lock, flags);
 
 	stock = this_cpu_ptr(&memcg_stock);
 	if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) {
@@ -2896,7 +2902,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
 			__account_obj_stock(objcg, stock, nr_bytes, pgdat, idx);
 	}
 
-	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+	local_unlock_irqrestore(&memcg_stock.obj_lock, flags);
 
 	return ret;
 }
@@ -2985,7 +2991,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
 	unsigned long flags;
 	unsigned int nr_pages = 0;
 
-	local_lock_irqsave(&memcg_stock.stock_lock, flags);
+	local_lock_irqsave(&memcg_stock.obj_lock, flags);
 
 	stock = this_cpu_ptr(&memcg_stock);
 	if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */
@@ -3007,7 +3013,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
 		stock->nr_bytes &= (PAGE_SIZE - 1);
 	}
 
-	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+	local_unlock_irqrestore(&memcg_stock.obj_lock, flags);
 
 	if (nr_pages)
 		obj_cgroup_uncharge_pages(objcg, nr_pages);
-- 
2.47.1
Re: [PATCH 2/4] memcg: separate local_trylock for memcg and obj
Posted by Vlastimil Babka 9 months, 1 week ago
On 4/30/25 01:04, Shakeel Butt wrote:
> The per-cpu stock_lock protects cached memcg and cached objcg and their
> respective fields. However there is no dependency between these fields
> and it is better to have fine grained separate locks for cached memcg
> and cached objcg. This decoupling of locks allows us to make the memcg
> charge cache and objcg charge cache to be nmi safe independently.
> 
> At the moment, memcg charge cache is already nmi safe and this
> decoupling will allow to make memcg charge cache work without disabling
> irqs.
> 
> Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> ---
>  mm/memcontrol.c | 52 +++++++++++++++++++++++++++----------------------
>  1 file changed, 29 insertions(+), 23 deletions(-)

> @@ -1883,19 +1885,22 @@ static void drain_local_stock(struct work_struct *dummy)
>  	struct memcg_stock_pcp *stock;
>  	unsigned long flags;
>  
> -	/*
> -	 * The only protection from cpu hotplug (memcg_hotplug_cpu_dead) vs.
> -	 * drain_stock races is that we always operate on local CPU stock
> -	 * here with IRQ disabled
> -	 */
> -	local_lock_irqsave(&memcg_stock.stock_lock, flags);
> +	if (WARN_ONCE(!in_task(), "drain in non-task context"))
> +		return;
>  
> +	preempt_disable();
>  	stock = this_cpu_ptr(&memcg_stock);
> +
> +	local_lock_irqsave(&memcg_stock.obj_lock, flags);
>  	drain_obj_stock(stock);
> +	local_unlock_irqrestore(&memcg_stock.obj_lock, flags);
> +
> +	local_lock_irqsave(&memcg_stock.memcg_lock, flags);
>  	drain_stock_fully(stock);
> -	clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
> +	local_unlock_irqrestore(&memcg_stock.memcg_lock, flags);
>  
> -	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
> +	clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
> +	preempt_enable();

This usage of preempt_disable() looks rather weird and makes RT unhappy as
the local lock is a mutex, so it gives you this:

BUG: sleeping function called from invalid context at
kernel/locking/spinlock_rt.c:48

I know the next patch removes it again but for bisectability purposes it
should be avoided. Instead of preempt_disable() we can extend the local lock
scope here?

>  }
>  
>  static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
> @@ -1918,10 +1923,10 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
>  	VM_WARN_ON_ONCE(mem_cgroup_is_root(memcg));
>  
>  	if (nr_pages > MEMCG_CHARGE_BATCH ||
> -	    !local_trylock_irqsave(&memcg_stock.stock_lock, flags)) {
> +	    !local_trylock_irqsave(&memcg_stock.memcg_lock, flags)) {
>  		/*
>  		 * In case of larger than batch refill or unlikely failure to
> -		 * lock the percpu stock_lock, uncharge memcg directly.
> +		 * lock the percpu memcg_lock, uncharge memcg directly.
>  		 */
>  		memcg_uncharge(memcg, nr_pages);
>  		return;
> @@ -1953,7 +1958,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
>  		WRITE_ONCE(stock->nr_pages[i], nr_pages);
>  	}
>  
> -	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
> +	local_unlock_irqrestore(&memcg_stock.memcg_lock, flags);
>  }
>  
>  static bool is_drain_needed(struct memcg_stock_pcp *stock,
> @@ -2028,11 +2033,12 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
>  
>  	stock = &per_cpu(memcg_stock, cpu);
>  
> -	/* drain_obj_stock requires stock_lock */
> -	local_lock_irqsave(&memcg_stock.stock_lock, flags);
> +	/* drain_obj_stock requires obj_lock */
> +	local_lock_irqsave(&memcg_stock.obj_lock, flags);
>  	drain_obj_stock(stock);
> -	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
> +	local_unlock_irqrestore(&memcg_stock.obj_lock, flags);
>  
> +	/* no need for the local lock */
>  	drain_stock_fully(stock);
>  
>  	return 0;
> @@ -2885,7 +2891,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
>  	unsigned long flags;
>  	bool ret = false;
>  
> -	local_lock_irqsave(&memcg_stock.stock_lock, flags);
> +	local_lock_irqsave(&memcg_stock.obj_lock, flags);
>  
>  	stock = this_cpu_ptr(&memcg_stock);
>  	if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) {
> @@ -2896,7 +2902,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
>  			__account_obj_stock(objcg, stock, nr_bytes, pgdat, idx);
>  	}
>  
> -	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
> +	local_unlock_irqrestore(&memcg_stock.obj_lock, flags);
>  
>  	return ret;
>  }
> @@ -2985,7 +2991,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
>  	unsigned long flags;
>  	unsigned int nr_pages = 0;
>  
> -	local_lock_irqsave(&memcg_stock.stock_lock, flags);
> +	local_lock_irqsave(&memcg_stock.obj_lock, flags);
>  
>  	stock = this_cpu_ptr(&memcg_stock);
>  	if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */
> @@ -3007,7 +3013,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
>  		stock->nr_bytes &= (PAGE_SIZE - 1);
>  	}
>  
> -	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
> +	local_unlock_irqrestore(&memcg_stock.obj_lock, flags);
>  
>  	if (nr_pages)
>  		obj_cgroup_uncharge_pages(objcg, nr_pages);
Re: [PATCH 2/4] memcg: separate local_trylock for memcg and obj
Posted by Shakeel Butt 9 months, 1 week ago
On Wed, Apr 30, 2025 at 01:42:47PM +0200, Vlastimil Babka wrote:
> On 4/30/25 01:04, Shakeel Butt wrote:
> > The per-cpu stock_lock protects cached memcg and cached objcg and their
> > respective fields. However there is no dependency between these fields
> > and it is better to have fine grained separate locks for cached memcg
> > and cached objcg. This decoupling of locks allows us to make the memcg
> > charge cache and objcg charge cache to be nmi safe independently.
> > 
> > At the moment, memcg charge cache is already nmi safe and this
> > decoupling will allow to make memcg charge cache work without disabling
> > irqs.
> > 
> > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> > ---
> >  mm/memcontrol.c | 52 +++++++++++++++++++++++++++----------------------
> >  1 file changed, 29 insertions(+), 23 deletions(-)
> 
> > @@ -1883,19 +1885,22 @@ static void drain_local_stock(struct work_struct *dummy)
> >  	struct memcg_stock_pcp *stock;
> >  	unsigned long flags;
> >  
> > -	/*
> > -	 * The only protection from cpu hotplug (memcg_hotplug_cpu_dead) vs.
> > -	 * drain_stock races is that we always operate on local CPU stock
> > -	 * here with IRQ disabled
> > -	 */
> > -	local_lock_irqsave(&memcg_stock.stock_lock, flags);
> > +	if (WARN_ONCE(!in_task(), "drain in non-task context"))
> > +		return;
> >  
> > +	preempt_disable();
> >  	stock = this_cpu_ptr(&memcg_stock);
> > +
> > +	local_lock_irqsave(&memcg_stock.obj_lock, flags);
> >  	drain_obj_stock(stock);
> > +	local_unlock_irqrestore(&memcg_stock.obj_lock, flags);
> > +
> > +	local_lock_irqsave(&memcg_stock.memcg_lock, flags);
> >  	drain_stock_fully(stock);
> > -	clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
> > +	local_unlock_irqrestore(&memcg_stock.memcg_lock, flags);
> >  
> > -	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
> > +	clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
> > +	preempt_enable();
> 
> This usage of preempt_disable() looks rather weird and makes RT unhappy as
> the local lock is a mutex, so it gives you this:
> 
> BUG: sleeping function called from invalid context at
> kernel/locking/spinlock_rt.c:48
> 
> I know the next patch removes it again but for bisectability purposes it
> should be avoided. Instead of preempt_disable() we can extend the local lock
> scope here?
> 

Indeed and thanks for the suggestion, will fix in v2.