From nobody Sat Feb 7 14:34:32 2026 Received: from out-188.mta0.migadu.com (out-188.mta0.migadu.com [91.218.175.188]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 67FDEDF71 for ; Fri, 2 May 2025 00:18:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=91.218.175.188 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746145103; cv=none; b=lcBy72km5D4izqV3kiImaQImBJFnzRsXCzVatxrUFMQ5GEYIBVCEF4sD7QfwPmQRFKkVQM9eH2Pyojq0Xp8GfNiOWxxrdKYj1S20xGYhoJXoiKLKzt7LgiqfLiGt34CpKHwLSMVirniG/kuoX2IchR0ZaRE62SzJ2rPaPyKGbyA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746145103; c=relaxed/simple; bh=KMnoZ8f/Qa2zo5PS1tn0eD5Aq7wW+h5KOR12NHgF+CA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=DJzYJKO8A6nymWCh1SHl9nA9pbumi23MPhmflzylcMrVNB0fKMmtsPTf5g1cBQXDNP5aIqi/KaVdkh4iDMC/sdxTQmD8Wn1Kaho16/eWwj8eQRperZdyWKI1axM85HqPrN2U3gwN5o6ue+F30c/kO5woDcmk+8MZ2dsjHWRKKIk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=GNaYOfNm; arc=none smtp.client-ip=91.218.175.188 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="GNaYOfNm" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1746145088; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=z8ptQwJSFfLo2cDiqIGklIojumEAEh3QOUi7xbHY5m8=; b=GNaYOfNmpJl8BUzBF6IJvETURlGcX7+mT1FmFkX5jN+8dtI6ZDkgvV4isSNPLKcFIn8J3Z mOF846iWFKWTtdFEtu+vqTfGLylxg97fRQwXvP3QwlZ5p6lE7YONP7RLjUb1f613dMdYYK Z5pvu04FsO8rvWt3jdEsOZ2S8KWaJ24= From: Shakeel Butt To: Andrew Morton Cc: Johannes Weiner , Michal Hocko , Roman Gushchin , Muchun Song , Alexei Starovoitov , linux-mm@kvack.org, cgroups@vger.kernel.org, bpf@vger.kernel.org, linux-kernel@vger.kernel.org, Meta kernel team Subject: [PATCH v2 1/3] memcg: separate local_trylock for memcg and obj Date: Thu, 1 May 2025 17:17:40 -0700 Message-ID: <20250502001742.3087558-2-shakeel.butt@linux.dev> In-Reply-To: <20250502001742.3087558-1-shakeel.butt@linux.dev> References: <20250502001742.3087558-1-shakeel.butt@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" The per-cpu stock_lock protects cached memcg and cached objcg and their respective fields. However there is no dependency between these fields and it is better to have fine grained separate locks for cached memcg and cached objcg. This decoupling of locks allows us to make the memcg charge cache and objcg charge cache to be nmi safe independently. At the moment, memcg charge cache is already nmi safe and this decoupling will allow to make memcg charge cache work without disabling irqs. Signed-off-by: Shakeel Butt --- Changes since v1: - Drop usage of preempt_disable() as suggested by Vlastimil. mm/memcontrol.c | 51 ++++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0d42699bb564..14714e1d36e9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1806,13 +1806,14 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *= memcg) */ #define NR_MEMCG_STOCK 7 struct memcg_stock_pcp { - local_trylock_t stock_lock; + local_trylock_t memcg_lock; uint8_t nr_pages[NR_MEMCG_STOCK]; struct mem_cgroup *cached[NR_MEMCG_STOCK]; =20 + local_trylock_t obj_lock; + unsigned int nr_bytes; struct obj_cgroup *cached_objcg; struct pglist_data *cached_pgdat; - unsigned int nr_bytes; int nr_slab_reclaimable_b; int nr_slab_unreclaimable_b; =20 @@ -1821,7 +1822,8 @@ struct memcg_stock_pcp { #define FLUSHING_CACHED_CHARGE 0 }; static DEFINE_PER_CPU_ALIGNED(struct memcg_stock_pcp, memcg_stock) =3D { - .stock_lock =3D INIT_LOCAL_TRYLOCK(stock_lock), + .memcg_lock =3D INIT_LOCAL_TRYLOCK(memcg_lock), + .obj_lock =3D INIT_LOCAL_TRYLOCK(obj_lock), }; static DEFINE_MUTEX(percpu_charge_mutex); =20 @@ -1854,8 +1856,8 @@ static bool consume_stock(struct mem_cgroup *memcg, u= nsigned int nr_pages, return ret; =20 if (gfpflags_allow_spinning(gfp_mask)) - local_lock_irqsave(&memcg_stock.stock_lock, flags); - else if (!local_trylock_irqsave(&memcg_stock.stock_lock, flags)) + local_lock_irqsave(&memcg_stock.memcg_lock, flags); + else if (!local_trylock_irqsave(&memcg_stock.memcg_lock, flags)) return ret; =20 stock =3D this_cpu_ptr(&memcg_stock); @@ -1872,7 +1874,7 @@ static bool consume_stock(struct mem_cgroup *memcg, u= nsigned int nr_pages, break; } =20 - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.memcg_lock, flags); =20 return ret; } @@ -1918,19 +1920,19 @@ static void drain_local_stock(struct work_struct *d= ummy) struct memcg_stock_pcp *stock; unsigned long flags; =20 - /* - * The only protection from cpu hotplug (memcg_hotplug_cpu_dead) vs. - * drain_stock races is that we always operate on local CPU stock - * here with IRQ disabled - */ - local_lock_irqsave(&memcg_stock.stock_lock, flags); + if (WARN_ONCE(!in_task(), "drain in non-task context")) + return; =20 + local_lock_irqsave(&memcg_stock.obj_lock, flags); stock =3D this_cpu_ptr(&memcg_stock); drain_obj_stock(stock); + local_unlock_irqrestore(&memcg_stock.obj_lock, flags); + + local_lock_irqsave(&memcg_stock.memcg_lock, flags); + stock =3D this_cpu_ptr(&memcg_stock); drain_stock_fully(stock); clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); - - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.memcg_lock, flags); } =20 static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) @@ -1953,10 +1955,10 @@ static void refill_stock(struct mem_cgroup *memcg, = unsigned int nr_pages) VM_WARN_ON_ONCE(mem_cgroup_is_root(memcg)); =20 if (nr_pages > MEMCG_CHARGE_BATCH || - !local_trylock_irqsave(&memcg_stock.stock_lock, flags)) { + !local_trylock_irqsave(&memcg_stock.memcg_lock, flags)) { /* * In case of larger than batch refill or unlikely failure to - * lock the percpu stock_lock, uncharge memcg directly. + * lock the percpu memcg_lock, uncharge memcg directly. */ memcg_uncharge(memcg, nr_pages); return; @@ -1988,7 +1990,7 @@ static void refill_stock(struct mem_cgroup *memcg, un= signed int nr_pages) WRITE_ONCE(stock->nr_pages[i], nr_pages); } =20 - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.memcg_lock, flags); } =20 static bool is_drain_needed(struct memcg_stock_pcp *stock, @@ -2063,11 +2065,12 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu) =20 stock =3D &per_cpu(memcg_stock, cpu); =20 - /* drain_obj_stock requires stock_lock */ - local_lock_irqsave(&memcg_stock.stock_lock, flags); + /* drain_obj_stock requires obj_lock */ + local_lock_irqsave(&memcg_stock.obj_lock, flags); drain_obj_stock(stock); - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.obj_lock, flags); =20 + /* no need for the local lock */ drain_stock_fully(stock); =20 return 0; @@ -2920,7 +2923,7 @@ static bool consume_obj_stock(struct obj_cgroup *objc= g, unsigned int nr_bytes, unsigned long flags; bool ret =3D false; =20 - local_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.obj_lock, flags); =20 stock =3D this_cpu_ptr(&memcg_stock); if (objcg =3D=3D READ_ONCE(stock->cached_objcg) && stock->nr_bytes >=3D n= r_bytes) { @@ -2931,7 +2934,7 @@ static bool consume_obj_stock(struct obj_cgroup *objc= g, unsigned int nr_bytes, __account_obj_stock(objcg, stock, nr_bytes, pgdat, idx); } =20 - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.obj_lock, flags); =20 return ret; } @@ -3020,7 +3023,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg= , unsigned int nr_bytes, unsigned long flags; unsigned int nr_pages =3D 0; =20 - local_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.obj_lock, flags); =20 stock =3D this_cpu_ptr(&memcg_stock); if (READ_ONCE(stock->cached_objcg) !=3D objcg) { /* reset if necessary */ @@ -3042,7 +3045,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg= , unsigned int nr_bytes, stock->nr_bytes &=3D (PAGE_SIZE - 1); } =20 - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.obj_lock, flags); =20 if (nr_pages) obj_cgroup_uncharge_pages(objcg, nr_pages); --=20 2.47.1 From nobody Sat Feb 7 14:34:32 2026 Received: from out-172.mta1.migadu.com (out-172.mta1.migadu.com [95.215.58.172]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 65ABBC8EB for ; Fri, 2 May 2025 00:18:18 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=95.215.58.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746145103; cv=none; b=fcMo0Zd8S5ewAXe9OErnH9dFIO/bMMLQDDgkqe+z/BvKHqqUhu4vn59xtr1y/kwHDGlQP+Np8BdyFYRkUGlJ9pX/3XDGfPbmG3/8dNPPmfJO9nVmldk+ByxXfFe1IZ3GW2Mr8QjfCGaZZZZOvO394slutuS5Mchn2YyP9q7Z5OE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746145103; c=relaxed/simple; bh=axBFpFhOiXoIYChDXClXxaaxXb6A9fs3XaVAI+OcZW4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=q3LA4IsnMPWDID6mxguxU+4gx6YTsMYXhb/tqIkLsZHChfgxbFvl0sxDvV9whWjOUA0LQw9ddq9IehjeCz04+5qwK+8Guy3fsiUWYoqTJ3zgnO7yO9iXmfMASyXQNr2/3Ob/MreYm2EhDkiiXUc/phSE74HHyTpONluT+6ctyLg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=mC6Ra1VA; arc=none smtp.client-ip=95.215.58.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="mC6Ra1VA" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1746145096; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=3qMCF0/XPZ0mCMOvXetG4kCK+cUAsNSQxkyeVg3OMcQ=; b=mC6Ra1VAKBnLB+tzb4m/2cVp6+QmusNG1hna35ympuJaNLptLqO3jEpNf35/l+Ij0o/R39 cy4qX1XPxHiXJIoXJN/5tAfPcTJsXdXBFRTRJSQqfUxBx4p85Oh4tSoB4PJ+qlwGAHGPfJ 7UhYjtS3uDouwuRfYv3WuLWoZZhQq7k= From: Shakeel Butt To: Andrew Morton Cc: Johannes Weiner , Michal Hocko , Roman Gushchin , Muchun Song , Alexei Starovoitov , linux-mm@kvack.org, cgroups@vger.kernel.org, bpf@vger.kernel.org, linux-kernel@vger.kernel.org, Meta kernel team , Vlastimil Babka Subject: [PATCH v2 2/3] memcg: completely decouple memcg and obj stocks Date: Thu, 1 May 2025 17:17:41 -0700 Message-ID: <20250502001742.3087558-3-shakeel.butt@linux.dev> In-Reply-To: <20250502001742.3087558-1-shakeel.butt@linux.dev> References: <20250502001742.3087558-1-shakeel.butt@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Let's completely decouple the memcg and obj per-cpu stocks. This will enable us to make memcg per-cpu stocks to used without disabling irqs. Also it will enable us to make obj stocks nmi safe independently which is required to make kmalloc/slab safe for allocations from nmi context. Signed-off-by: Shakeel Butt Acked-by: Vlastimil Babka --- mm/memcontrol.c | 151 +++++++++++++++++++++++++++++------------------- 1 file changed, 93 insertions(+), 58 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 14714e1d36e9..cd81c70d144b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1804,13 +1804,23 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *= memcg) * The value of NR_MEMCG_STOCK is selected to keep the cached memcgs and t= heir * nr_pages in a single cacheline. This may change in future. */ +#define FLUSHING_CACHED_CHARGE 0 #define NR_MEMCG_STOCK 7 struct memcg_stock_pcp { - local_trylock_t memcg_lock; + local_trylock_t lock; uint8_t nr_pages[NR_MEMCG_STOCK]; struct mem_cgroup *cached[NR_MEMCG_STOCK]; =20 - local_trylock_t obj_lock; + struct work_struct work; + unsigned long flags; +}; + +static DEFINE_PER_CPU_ALIGNED(struct memcg_stock_pcp, memcg_stock) =3D { + .lock =3D INIT_LOCAL_TRYLOCK(lock), +}; + +struct obj_stock_pcp { + local_trylock_t lock; unsigned int nr_bytes; struct obj_cgroup *cached_objcg; struct pglist_data *cached_pgdat; @@ -1819,16 +1829,16 @@ struct memcg_stock_pcp { =20 struct work_struct work; unsigned long flags; -#define FLUSHING_CACHED_CHARGE 0 }; -static DEFINE_PER_CPU_ALIGNED(struct memcg_stock_pcp, memcg_stock) =3D { - .memcg_lock =3D INIT_LOCAL_TRYLOCK(memcg_lock), - .obj_lock =3D INIT_LOCAL_TRYLOCK(obj_lock), + +static DEFINE_PER_CPU_ALIGNED(struct obj_stock_pcp, obj_stock) =3D { + .lock =3D INIT_LOCAL_TRYLOCK(lock), }; + static DEFINE_MUTEX(percpu_charge_mutex); =20 -static void drain_obj_stock(struct memcg_stock_pcp *stock); -static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, +static void drain_obj_stock(struct obj_stock_pcp *stock); +static bool obj_stock_flush_required(struct obj_stock_pcp *stock, struct mem_cgroup *root_memcg); =20 /** @@ -1856,8 +1866,8 @@ static bool consume_stock(struct mem_cgroup *memcg, u= nsigned int nr_pages, return ret; =20 if (gfpflags_allow_spinning(gfp_mask)) - local_lock_irqsave(&memcg_stock.memcg_lock, flags); - else if (!local_trylock_irqsave(&memcg_stock.memcg_lock, flags)) + local_lock_irqsave(&memcg_stock.lock, flags); + else if (!local_trylock_irqsave(&memcg_stock.lock, flags)) return ret; =20 stock =3D this_cpu_ptr(&memcg_stock); @@ -1874,7 +1884,7 @@ static bool consume_stock(struct mem_cgroup *memcg, u= nsigned int nr_pages, break; } =20 - local_unlock_irqrestore(&memcg_stock.memcg_lock, flags); + local_unlock_irqrestore(&memcg_stock.lock, flags); =20 return ret; } @@ -1915,7 +1925,7 @@ static void drain_stock_fully(struct memcg_stock_pcp = *stock) drain_stock(stock, i); } =20 -static void drain_local_stock(struct work_struct *dummy) +static void drain_local_memcg_stock(struct work_struct *dummy) { struct memcg_stock_pcp *stock; unsigned long flags; @@ -1923,16 +1933,30 @@ static void drain_local_stock(struct work_struct *d= ummy) if (WARN_ONCE(!in_task(), "drain in non-task context")) return; =20 - local_lock_irqsave(&memcg_stock.obj_lock, flags); - stock =3D this_cpu_ptr(&memcg_stock); - drain_obj_stock(stock); - local_unlock_irqrestore(&memcg_stock.obj_lock, flags); + local_lock_irqsave(&memcg_stock.lock, flags); =20 - local_lock_irqsave(&memcg_stock.memcg_lock, flags); stock =3D this_cpu_ptr(&memcg_stock); drain_stock_fully(stock); clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); - local_unlock_irqrestore(&memcg_stock.memcg_lock, flags); + + local_unlock_irqrestore(&memcg_stock.lock, flags); +} + +static void drain_local_obj_stock(struct work_struct *dummy) +{ + struct obj_stock_pcp *stock; + unsigned long flags; + + if (WARN_ONCE(!in_task(), "drain in non-task context")) + return; + + local_lock_irqsave(&obj_stock.lock, flags); + + stock =3D this_cpu_ptr(&obj_stock); + drain_obj_stock(stock); + clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); + + local_unlock_irqrestore(&obj_stock.lock, flags); } =20 static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) @@ -1955,10 +1979,10 @@ static void refill_stock(struct mem_cgroup *memcg, = unsigned int nr_pages) VM_WARN_ON_ONCE(mem_cgroup_is_root(memcg)); =20 if (nr_pages > MEMCG_CHARGE_BATCH || - !local_trylock_irqsave(&memcg_stock.memcg_lock, flags)) { + !local_trylock_irqsave(&memcg_stock.lock, flags)) { /* * In case of larger than batch refill or unlikely failure to - * lock the percpu memcg_lock, uncharge memcg directly. + * lock the percpu memcg_stock.lock, uncharge memcg directly. */ memcg_uncharge(memcg, nr_pages); return; @@ -1990,23 +2014,17 @@ static void refill_stock(struct mem_cgroup *memcg, = unsigned int nr_pages) WRITE_ONCE(stock->nr_pages[i], nr_pages); } =20 - local_unlock_irqrestore(&memcg_stock.memcg_lock, flags); + local_unlock_irqrestore(&memcg_stock.lock, flags); } =20 -static bool is_drain_needed(struct memcg_stock_pcp *stock, - struct mem_cgroup *root_memcg) +static bool is_memcg_drain_needed(struct memcg_stock_pcp *stock, + struct mem_cgroup *root_memcg) { struct mem_cgroup *memcg; bool flush =3D false; int i; =20 rcu_read_lock(); - - if (obj_stock_flush_required(stock, root_memcg)) { - flush =3D true; - goto out; - } - for (i =3D 0; i < NR_MEMCG_STOCK; ++i) { memcg =3D READ_ONCE(stock->cached[i]); if (!memcg) @@ -2018,7 +2036,6 @@ static bool is_drain_needed(struct memcg_stock_pcp *s= tock, break; } } -out: rcu_read_unlock(); return flush; } @@ -2043,15 +2060,27 @@ void drain_all_stock(struct mem_cgroup *root_memcg) migrate_disable(); curcpu =3D smp_processor_id(); for_each_online_cpu(cpu) { - struct memcg_stock_pcp *stock =3D &per_cpu(memcg_stock, cpu); - bool flush =3D is_drain_needed(stock, root_memcg); + struct memcg_stock_pcp *memcg_st =3D &per_cpu(memcg_stock, cpu); + struct obj_stock_pcp *obj_st =3D &per_cpu(obj_stock, cpu); =20 - if (flush && - !test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) { + if (!test_bit(FLUSHING_CACHED_CHARGE, &memcg_st->flags) && + is_memcg_drain_needed(memcg_st, root_memcg) && + !test_and_set_bit(FLUSHING_CACHED_CHARGE, + &memcg_st->flags)) { if (cpu =3D=3D curcpu) - drain_local_stock(&stock->work); + drain_local_memcg_stock(&memcg_st->work); else if (!cpu_is_isolated(cpu)) - schedule_work_on(cpu, &stock->work); + schedule_work_on(cpu, &memcg_st->work); + } + + if (!test_bit(FLUSHING_CACHED_CHARGE, &obj_st->flags) && + obj_stock_flush_required(obj_st, root_memcg) && + !test_and_set_bit(FLUSHING_CACHED_CHARGE, + &obj_st->flags)) { + if (cpu =3D=3D curcpu) + drain_local_obj_stock(&obj_st->work); + else if (!cpu_is_isolated(cpu)) + schedule_work_on(cpu, &obj_st->work); } } migrate_enable(); @@ -2060,18 +2089,18 @@ void drain_all_stock(struct mem_cgroup *root_memcg) =20 static int memcg_hotplug_cpu_dead(unsigned int cpu) { - struct memcg_stock_pcp *stock; + struct obj_stock_pcp *obj_st; unsigned long flags; =20 - stock =3D &per_cpu(memcg_stock, cpu); + obj_st =3D &per_cpu(obj_stock, cpu); =20 - /* drain_obj_stock requires obj_lock */ - local_lock_irqsave(&memcg_stock.obj_lock, flags); - drain_obj_stock(stock); - local_unlock_irqrestore(&memcg_stock.obj_lock, flags); + /* drain_obj_stock requires objstock.lock */ + local_lock_irqsave(&obj_stock.lock, flags); + drain_obj_stock(obj_st); + local_unlock_irqrestore(&obj_stock.lock, flags); =20 /* no need for the local lock */ - drain_stock_fully(stock); + drain_stock_fully(&per_cpu(memcg_stock, cpu)); =20 return 0; } @@ -2868,7 +2897,7 @@ void __memcg_kmem_uncharge_page(struct page *page, in= t order) } =20 static void __account_obj_stock(struct obj_cgroup *objcg, - struct memcg_stock_pcp *stock, int nr, + struct obj_stock_pcp *stock, int nr, struct pglist_data *pgdat, enum node_stat_item idx) { int *bytes; @@ -2919,13 +2948,13 @@ static void __account_obj_stock(struct obj_cgroup *= objcg, static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_by= tes, struct pglist_data *pgdat, enum node_stat_item idx) { - struct memcg_stock_pcp *stock; + struct obj_stock_pcp *stock; unsigned long flags; bool ret =3D false; =20 - local_lock_irqsave(&memcg_stock.obj_lock, flags); + local_lock_irqsave(&obj_stock.lock, flags); =20 - stock =3D this_cpu_ptr(&memcg_stock); + stock =3D this_cpu_ptr(&obj_stock); if (objcg =3D=3D READ_ONCE(stock->cached_objcg) && stock->nr_bytes >=3D n= r_bytes) { stock->nr_bytes -=3D nr_bytes; ret =3D true; @@ -2934,12 +2963,12 @@ static bool consume_obj_stock(struct obj_cgroup *ob= jcg, unsigned int nr_bytes, __account_obj_stock(objcg, stock, nr_bytes, pgdat, idx); } =20 - local_unlock_irqrestore(&memcg_stock.obj_lock, flags); + local_unlock_irqrestore(&obj_stock.lock, flags); =20 return ret; } =20 -static void drain_obj_stock(struct memcg_stock_pcp *stock) +static void drain_obj_stock(struct obj_stock_pcp *stock) { struct obj_cgroup *old =3D READ_ONCE(stock->cached_objcg); =20 @@ -3000,32 +3029,35 @@ static void drain_obj_stock(struct memcg_stock_pcp = *stock) obj_cgroup_put(old); } =20 -static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, +static bool obj_stock_flush_required(struct obj_stock_pcp *stock, struct mem_cgroup *root_memcg) { struct obj_cgroup *objcg =3D READ_ONCE(stock->cached_objcg); struct mem_cgroup *memcg; + bool flush =3D false; =20 + rcu_read_lock(); if (objcg) { memcg =3D obj_cgroup_memcg(objcg); if (memcg && mem_cgroup_is_descendant(memcg, root_memcg)) - return true; + flush =3D true; } + rcu_read_unlock(); =20 - return false; + return flush; } =20 static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_byt= es, bool allow_uncharge, int nr_acct, struct pglist_data *pgdat, enum node_stat_item idx) { - struct memcg_stock_pcp *stock; + struct obj_stock_pcp *stock; unsigned long flags; unsigned int nr_pages =3D 0; =20 - local_lock_irqsave(&memcg_stock.obj_lock, flags); + local_lock_irqsave(&obj_stock.lock, flags); =20 - stock =3D this_cpu_ptr(&memcg_stock); + stock =3D this_cpu_ptr(&obj_stock); if (READ_ONCE(stock->cached_objcg) !=3D objcg) { /* reset if necessary */ drain_obj_stock(stock); obj_cgroup_get(objcg); @@ -3045,7 +3077,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg= , unsigned int nr_bytes, stock->nr_bytes &=3D (PAGE_SIZE - 1); } =20 - local_unlock_irqrestore(&memcg_stock.obj_lock, flags); + local_unlock_irqrestore(&obj_stock.lock, flags); =20 if (nr_pages) obj_cgroup_uncharge_pages(objcg, nr_pages); @@ -5164,9 +5196,12 @@ int __init mem_cgroup_init(void) cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL, memcg_hotplug_cpu_dead); =20 - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work, - drain_local_stock); + drain_local_memcg_stock); + INIT_WORK(&per_cpu_ptr(&obj_stock, cpu)->work, + drain_local_obj_stock); + } =20 memcg_size =3D struct_size_t(struct mem_cgroup, nodeinfo, nr_node_ids); memcg_cachep =3D kmem_cache_create("mem_cgroup", memcg_size, 0, --=20 2.47.1 From nobody Sat Feb 7 14:34:32 2026 Received: from out-184.mta0.migadu.com (out-184.mta0.migadu.com [91.218.175.184]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DBFE2288A2 for ; Fri, 2 May 2025 00:18:24 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=91.218.175.184 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746145106; cv=none; b=q9gZK2U27fPltaSWPXrSmL5+LFspoaJRFP/ElRLLy+stpK2vZXb0GF0JVSNQ18bajIOUaoc6h7lye55QV3ltLJ/+X0922z9b/xcFz9noROL6zM6cDtswXfvHzLq13I6LhtQzAaCz9qZ4TBeR3UW3HpVjdY693kCHMostYebqNfA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746145106; c=relaxed/simple; bh=z5u/IWX/OGAeggD4L0wO3Rq2Xm7cLReCVEYdMranAyw=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=RRlnzQbSaB4UUMVd05t2Nv9CjvvmocchS49ViYBYH51zzXvWNDWVTIHGPOzoBwpxHR5bwC8/85XSoynvQtsdYNrc+jsZwKEcZp0R5AXhxCiq04UKASQQLJVYj46/XKGhnDF/OoLxYS1k0mbisgzYXSGwoe2qphtDH+c3dLEv0zI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=CtAc5AYq; arc=none smtp.client-ip=91.218.175.184 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="CtAc5AYq" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1746145102; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=wjhN3Y6FnUXW1h0Itb0ju6GbNSYKm3lt/+0DxAbkvdQ=; b=CtAc5AYqfduFBQiuVXsgHGOTb00ozN+uxz70smBElMiX2eFEL6iBMfE/pLlICt0tNzw5aj t49hirx+9ceyMQRTKx97Zs8l7LN63GDQ8tHsQoCSKoli303RDO3x8JkA4pRXjyMjhKUkMG GTgt8J8Im7j0Z7bSbaSefV/VtLas5ac= From: Shakeel Butt To: Andrew Morton Cc: Johannes Weiner , Michal Hocko , Roman Gushchin , Muchun Song , Alexei Starovoitov , linux-mm@kvack.org, cgroups@vger.kernel.org, bpf@vger.kernel.org, linux-kernel@vger.kernel.org, Meta kernel team , Vlastimil Babka Subject: [PATCH v2 3/3] memcg: no irq disable for memcg stock lock Date: Thu, 1 May 2025 17:17:42 -0700 Message-ID: <20250502001742.3087558-4-shakeel.butt@linux.dev> In-Reply-To: <20250502001742.3087558-1-shakeel.butt@linux.dev> References: <20250502001742.3087558-1-shakeel.butt@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" There is no need to disable irqs to use memcg per-cpu stock, so let's just not do that. One consequence of this change is if the kernel while in task context has the memcg stock lock and that cpu got interrupted. The memcg charges on that cpu in the irq context will take the slow path of memcg charging. However that should be super rare and should be fine in general. Signed-off-by: Shakeel Butt Acked-by: Vlastimil Babka --- mm/memcontrol.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index cd81c70d144b..f8b9c7aa6771 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1858,7 +1858,6 @@ static bool consume_stock(struct mem_cgroup *memcg, u= nsigned int nr_pages, { struct memcg_stock_pcp *stock; uint8_t stock_pages; - unsigned long flags; bool ret =3D false; int i; =20 @@ -1866,8 +1865,8 @@ static bool consume_stock(struct mem_cgroup *memcg, u= nsigned int nr_pages, return ret; =20 if (gfpflags_allow_spinning(gfp_mask)) - local_lock_irqsave(&memcg_stock.lock, flags); - else if (!local_trylock_irqsave(&memcg_stock.lock, flags)) + local_lock(&memcg_stock.lock); + else if (!local_trylock(&memcg_stock.lock)) return ret; =20 stock =3D this_cpu_ptr(&memcg_stock); @@ -1884,7 +1883,7 @@ static bool consume_stock(struct mem_cgroup *memcg, u= nsigned int nr_pages, break; } =20 - local_unlock_irqrestore(&memcg_stock.lock, flags); + local_unlock(&memcg_stock.lock); =20 return ret; } @@ -1928,18 +1927,17 @@ static void drain_stock_fully(struct memcg_stock_pc= p *stock) static void drain_local_memcg_stock(struct work_struct *dummy) { struct memcg_stock_pcp *stock; - unsigned long flags; =20 if (WARN_ONCE(!in_task(), "drain in non-task context")) return; =20 - local_lock_irqsave(&memcg_stock.lock, flags); + local_lock(&memcg_stock.lock); =20 stock =3D this_cpu_ptr(&memcg_stock); drain_stock_fully(stock); clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); =20 - local_unlock_irqrestore(&memcg_stock.lock, flags); + local_unlock(&memcg_stock.lock); } =20 static void drain_local_obj_stock(struct work_struct *dummy) @@ -1964,7 +1962,6 @@ static void refill_stock(struct mem_cgroup *memcg, un= signed int nr_pages) struct memcg_stock_pcp *stock; struct mem_cgroup *cached; uint8_t stock_pages; - unsigned long flags; bool success =3D false; int empty_slot =3D -1; int i; @@ -1979,7 +1976,7 @@ static void refill_stock(struct mem_cgroup *memcg, un= signed int nr_pages) VM_WARN_ON_ONCE(mem_cgroup_is_root(memcg)); =20 if (nr_pages > MEMCG_CHARGE_BATCH || - !local_trylock_irqsave(&memcg_stock.lock, flags)) { + !local_trylock(&memcg_stock.lock)) { /* * In case of larger than batch refill or unlikely failure to * lock the percpu memcg_stock.lock, uncharge memcg directly. @@ -2014,7 +2011,7 @@ static void refill_stock(struct mem_cgroup *memcg, un= signed int nr_pages) WRITE_ONCE(stock->nr_pages[i], nr_pages); } =20 - local_unlock_irqrestore(&memcg_stock.lock, flags); + local_unlock(&memcg_stock.lock); } =20 static bool is_memcg_drain_needed(struct memcg_stock_pcp *stock, --=20 2.47.1