From nobody Sun Feb 8 01:29:55 2026 Received: from out-184.mta0.migadu.com (out-184.mta0.migadu.com [91.218.175.184]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 859561F4168 for ; Wed, 14 May 2025 05:08:35 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=91.218.175.184 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1747199318; cv=none; b=NgHmk6pmLo0M/Wr6a+ADEvDQu+OqNjvFN+rFn+Qz4TUA8rRLoSzWxpvKxrSZbBstkpHXBxdd9CsedTDlNzT4b3UVFOHg4GZ6gJhP9MRMkeqQSiEJLssElRoL5FhYA6Fwj/F4IjQL4SDaCCL16cfmi8kU0HMVwDxqQpjZdCDtYo8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1747199318; c=relaxed/simple; bh=RG9eTvAXCp/BCxIURb/14CMPEMU/XhGJRwyRB7eMJ4Q=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=plIYA9Pz/KkGiPFemkMaT8TgJUPfeyjLfLBjXYu87JpejIacf0tTu9r+/yHZOOL9HKsNJB0XN57sooAH+4zDTneXcy9mUBBQM6amK9NYHFhT7DcKaubAnYYBSknSDmvgLDPgicT0BA05Xer7/gw9eHvtUnrC3UxU/O86hIqssIo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=clfLkUdZ; arc=none smtp.client-ip=91.218.175.184 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="clfLkUdZ" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1747199313; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=zTCWWrV0l4ME9vJBQdx9lnHLC7UBjeIHK4sO6n82O0w=; b=clfLkUdZQCeY4LqSnalnqCiuGz+eMS9VvbrRqDlJq//1jNxS3/xkzuZH0dAccVoNF2esh9 AoAfroMAbkmB8hJml6UIrG23bU6WvwkGvr/OeR1DT+8bEraK8BLD3NB2tddQZPLwAPo/7V ddblr4d4JckNJE3bSKj7MOy1KgFTcPw= From: Shakeel Butt To: Andrew Morton Cc: Johannes Weiner , Michal Hocko , Roman Gushchin , Muchun Song , Vlastimil Babka , Alexei Starovoitov , Sebastian Andrzej Siewior , Harry Yoo , Yosry Ahmed , bpf@vger.kernel.org, linux-mm@kvack.org, cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, Meta kernel team Subject: [PATCH 1/7] memcg: memcg_rstat_updated re-entrant safe against irqs Date: Tue, 13 May 2025 22:08:07 -0700 Message-ID: <20250514050813.2526843-2-shakeel.butt@linux.dev> In-Reply-To: <20250514050813.2526843-1-shakeel.butt@linux.dev> References: <20250514050813.2526843-1-shakeel.butt@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" The function memcg_rstat_updated() is used to track the memcg stats updates for optimizing the flushes. At the moment, it is not re-entrant safe and the callers disabled irqs before calling. However to achieve the goal of updating memcg stats without irqs, memcg_rstat_updated() needs to be re-entrant safe against irqs. This patch makes memcg_rstat_updated() re-entrant safe using this_cpu_* ops. On archs with CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS, this patch is also making memcg_rstat_updated() nmi safe. Signed-off-by: Shakeel Butt Reviewed-by: Vlastimil Babka --- mm/memcontrol.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a713f160d669..cb10bcd1028d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -506,8 +506,8 @@ struct memcg_vmstats_percpu { unsigned int stats_updates; =20 /* Cached pointers for fast iteration in memcg_rstat_updated() */ - struct memcg_vmstats_percpu *parent; - struct memcg_vmstats *vmstats; + struct memcg_vmstats_percpu __percpu *parent_pcpu; + struct memcg_vmstats *vmstats; =20 /* The above should fit a single cacheline for memcg_rstat_updated() */ =20 @@ -589,32 +589,38 @@ static bool memcg_vmstats_needs_flush(struct memcg_vm= stats *vmstats) =20 static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val) { - struct memcg_vmstats_percpu *statc; - int cpu =3D smp_processor_id(); + struct memcg_vmstats_percpu __percpu *statc_pcpu; + int cpu; unsigned int stats_updates; =20 if (!val) return; =20 + /* Don't assume callers have preemption disabled. */ + cpu =3D get_cpu(); + css_rstat_updated(&memcg->css, cpu); - statc =3D this_cpu_ptr(memcg->vmstats_percpu); - for (; statc; statc =3D statc->parent) { + statc_pcpu =3D memcg->vmstats_percpu; + for (; statc_pcpu; statc_pcpu =3D this_cpu_ptr(statc_pcpu)->parent_pcpu) { /* * If @memcg is already flushable then all its ancestors are * flushable as well and also there is no need to increase * stats_updates. */ - if (memcg_vmstats_needs_flush(statc->vmstats)) + if (memcg_vmstats_needs_flush(this_cpu_ptr(statc_pcpu)->vmstats)) break; =20 - stats_updates =3D READ_ONCE(statc->stats_updates) + abs(val); - WRITE_ONCE(statc->stats_updates, stats_updates); + stats_updates =3D this_cpu_add_return(statc_pcpu->stats_updates, + abs(val)); if (stats_updates < MEMCG_CHARGE_BATCH) continue; =20 - atomic64_add(stats_updates, &statc->vmstats->stats_updates); - WRITE_ONCE(statc->stats_updates, 0); + stats_updates =3D this_cpu_xchg(statc_pcpu->stats_updates, 0); + if (stats_updates) + atomic64_add(stats_updates, + &this_cpu_ptr(statc_pcpu)->vmstats->stats_updates); } + put_cpu(); } =20 static void __mem_cgroup_flush_stats(struct mem_cgroup *memcg, bool force) @@ -3716,7 +3722,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg) =20 static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent) { - struct memcg_vmstats_percpu *statc, *pstatc; + struct memcg_vmstats_percpu *statc, __percpu *pstatc_pcpu; struct mem_cgroup *memcg; int node, cpu; int __maybe_unused i; @@ -3747,9 +3753,9 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem= _cgroup *parent) =20 for_each_possible_cpu(cpu) { if (parent) - pstatc =3D per_cpu_ptr(parent->vmstats_percpu, cpu); + pstatc_pcpu =3D parent->vmstats_percpu; statc =3D per_cpu_ptr(memcg->vmstats_percpu, cpu); - statc->parent =3D parent ? pstatc : NULL; + statc->parent_pcpu =3D parent ? pstatc_pcpu : NULL; statc->vmstats =3D memcg->vmstats; } =20 --=20 2.47.1 From nobody Sun Feb 8 01:29:55 2026 Received: from out-184.mta0.migadu.com (out-184.mta0.migadu.com [91.218.175.184]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 70B6E1F4CB3 for ; Wed, 14 May 2025 05:08:40 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=91.218.175.184 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1747199322; cv=none; b=Ye/WWCKvDdiYSM+WmWMq9PWqibkM8MLI74WrIVNuaS7i5agUykbhw9TB7QqfpqTilRvt1OLJ1xLI7EX/bZli//XU9AjjzJQwrfZgt6u9qY7evzU8fcym0xIc/PSLUouqS4o8LcvLDPpUw7Vr9ujqN+DIwxip8pm8AMGp04s7OLE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1747199322; c=relaxed/simple; bh=8jpCKcYevABsM1E+ovY62bcN7h6ecmAAnFb2kM2PaZY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=jGqLaExHfsfwxLQzFCfYjuobA+SLB+GmcJuNIUjEIlmKe9IWqPaVdWC/e8UyuSqYpNyIlO/uaEzdIggez/qWNbB6KO/mkf/ZmFCvfZ17Sp8Pyv7l5gvvGKlSbcDV37jHHsiPe+GAnN5xpn80KTrijlf5YY83fk6pbD8SmSpJxKg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=Ufan84iQ; arc=none smtp.client-ip=91.218.175.184 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="Ufan84iQ" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1747199317; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=lT1lWRIc39oQfhab/AvhtsB7gjFHkHKvP88N+9WuUp8=; b=Ufan84iQx4kZ6ny1kDkEoY8jcItoUy+lGSmiHmEWOC/vnxckDXJ8pN7FdgVYDVZtg8IOYP WwRCYapOwVegFLk0yijnYzo88XArgShFZK0r+JpUQyd24hpzeJ9fo5Y9u7BOR4rGgJOjDR Otlz62CicA3jypXQ1jSx2lqykI4LCkk= From: Shakeel Butt To: Andrew Morton Cc: Johannes Weiner , Michal Hocko , Roman Gushchin , Muchun Song , Vlastimil Babka , Alexei Starovoitov , Sebastian Andrzej Siewior , Harry Yoo , Yosry Ahmed , bpf@vger.kernel.org, linux-mm@kvack.org, cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, Meta kernel team Subject: [PATCH 2/7] memcg: move preempt disable to callers of memcg_rstat_updated Date: Tue, 13 May 2025 22:08:08 -0700 Message-ID: <20250514050813.2526843-3-shakeel.butt@linux.dev> In-Reply-To: <20250514050813.2526843-1-shakeel.butt@linux.dev> References: <20250514050813.2526843-1-shakeel.butt@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Let's move the explicit preempt disable code to the callers of memcg_rstat_updated and also remove the memcg_stats_lock and related functions which ensures the callers of stats update functions have disabled preemption because now the stats update functions are explicitly disabling preemption. Signed-off-by: Shakeel Butt Acked-by: Vlastimil Babka --- mm/memcontrol.c | 74 +++++++++++++------------------------------------ 1 file changed, 19 insertions(+), 55 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index cb10bcd1028d..8c8e0e1acd71 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -558,47 +558,21 @@ static u64 flush_last_time; =20 #define FLUSH_TIME (2UL*HZ) =20 -/* - * Accessors to ensure that preemption is disabled on PREEMPT_RT because i= t can - * not rely on this as part of an acquired spinlock_t lock. These function= s are - * never used in hardirq context on PREEMPT_RT and therefore disabling pre= emtion - * is sufficient. - */ -static void memcg_stats_lock(void) -{ - preempt_disable_nested(); - VM_WARN_ON_IRQS_ENABLED(); -} - -static void __memcg_stats_lock(void) -{ - preempt_disable_nested(); -} - -static void memcg_stats_unlock(void) -{ - preempt_enable_nested(); -} - - static bool memcg_vmstats_needs_flush(struct memcg_vmstats *vmstats) { return atomic64_read(&vmstats->stats_updates) > MEMCG_CHARGE_BATCH * num_online_cpus(); } =20 -static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val) +static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val, + int cpu) { struct memcg_vmstats_percpu __percpu *statc_pcpu; - int cpu; unsigned int stats_updates; =20 if (!val) return; =20 - /* Don't assume callers have preemption disabled. */ - cpu =3D get_cpu(); - css_rstat_updated(&memcg->css, cpu); statc_pcpu =3D memcg->vmstats_percpu; for (; statc_pcpu; statc_pcpu =3D this_cpu_ptr(statc_pcpu)->parent_pcpu) { @@ -620,7 +594,6 @@ static inline void memcg_rstat_updated(struct mem_cgrou= p *memcg, int val) atomic64_add(stats_updates, &this_cpu_ptr(statc_pcpu)->vmstats->stats_updates); } - put_cpu(); } =20 static void __mem_cgroup_flush_stats(struct mem_cgroup *memcg, bool force) @@ -718,6 +691,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, enum m= emcg_stat_item idx, int val) { int i =3D memcg_stats_index(idx); + int cpu; =20 if (mem_cgroup_disabled()) return; @@ -725,12 +699,14 @@ void __mod_memcg_state(struct mem_cgroup *memcg, enum= memcg_stat_item idx, if (WARN_ONCE(BAD_STAT_IDX(i), "%s: missing stat item %d\n", __func__, id= x)) return; =20 - memcg_stats_lock(); + cpu =3D get_cpu(); + __this_cpu_add(memcg->vmstats_percpu->state[i], val); val =3D memcg_state_val_in_pages(idx, val); - memcg_rstat_updated(memcg, val); + memcg_rstat_updated(memcg, val, cpu); trace_mod_memcg_state(memcg, idx, val); - memcg_stats_unlock(); + + put_cpu(); } =20 #ifdef CONFIG_MEMCG_V1 @@ -759,6 +735,7 @@ static void __mod_memcg_lruvec_state(struct lruvec *lru= vec, struct mem_cgroup_per_node *pn; struct mem_cgroup *memcg; int i =3D memcg_stats_index(idx); + int cpu; =20 if (WARN_ONCE(BAD_STAT_IDX(i), "%s: missing stat item %d\n", __func__, id= x)) return; @@ -766,24 +743,7 @@ static void __mod_memcg_lruvec_state(struct lruvec *lr= uvec, pn =3D container_of(lruvec, struct mem_cgroup_per_node, lruvec); memcg =3D pn->memcg; =20 - /* - * The caller from rmap relies on disabled preemption because they never - * update their counter from in-interrupt context. For these two - * counters we check that the update is never performed from an - * interrupt context while other caller need to have disabled interrupt. - */ - __memcg_stats_lock(); - if (IS_ENABLED(CONFIG_DEBUG_VM)) { - switch (idx) { - case NR_ANON_MAPPED: - case NR_FILE_MAPPED: - case NR_ANON_THPS: - WARN_ON_ONCE(!in_task()); - break; - default: - VM_WARN_ON_IRQS_ENABLED(); - } - } + cpu =3D get_cpu(); =20 /* Update memcg */ __this_cpu_add(memcg->vmstats_percpu->state[i], val); @@ -792,9 +752,10 @@ static void __mod_memcg_lruvec_state(struct lruvec *lr= uvec, __this_cpu_add(pn->lruvec_stats_percpu->state[i], val); =20 val =3D memcg_state_val_in_pages(idx, val); - memcg_rstat_updated(memcg, val); + memcg_rstat_updated(memcg, val, cpu); trace_mod_memcg_lruvec_state(memcg, idx, val); - memcg_stats_unlock(); + + put_cpu(); } =20 /** @@ -874,6 +835,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enu= m vm_event_item idx, unsigned long count) { int i =3D memcg_events_index(idx); + int cpu; =20 if (mem_cgroup_disabled()) return; @@ -881,11 +843,13 @@ void __count_memcg_events(struct mem_cgroup *memcg, e= num vm_event_item idx, if (WARN_ONCE(BAD_STAT_IDX(i), "%s: missing stat item %d\n", __func__, id= x)) return; =20 - memcg_stats_lock(); + cpu =3D get_cpu(); + __this_cpu_add(memcg->vmstats_percpu->events[i], count); - memcg_rstat_updated(memcg, count); + memcg_rstat_updated(memcg, count, cpu); trace_count_memcg_events(memcg, idx, count); - memcg_stats_unlock(); + + put_cpu(); } =20 unsigned long memcg_events(struct mem_cgroup *memcg, int event) --=20 2.47.1 From nobody Sun Feb 8 01:29:55 2026 Received: from out-182.mta1.migadu.com (out-182.mta1.migadu.com [95.215.58.182]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 616021F4192; Wed, 14 May 2025 05:08:53 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=95.215.58.182 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1747199335; cv=none; b=qunIBn8uC3oMW+3P2HzngnrUvedpKr1dQAGl9+aO9avC3wDWFrzgYENG+jx/wXicU8gcBdsRJlK4gxuMBsuVMhSZYXcM2K9mP0GVLh1C1V7+c4qhyA6Sg9YGCHi8+H3hICL4BJLDXiWBle+WY/u8tGMFUOG2WGE7EbqxWLVJR1o= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1747199335; c=relaxed/simple; bh=PFkD0AMkM3GwtCEKLFsaTynSsPRuggxc5SknzYJ3S7E=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=KYZH5Cq38Gf0HB+gfeXYY7WC/Rt+JEdSiEKb3y2kQUBihnyK2rYPkYRSqpCUwEiRyAK7B4LT6drSIQzR9TP93sxSEu1WAQ4J24vF9riHWe9sM/GcZM7Dk2iXdiw2MONRcRriZmiTecIIb5DHKgwG7YR9OtYv90tOd7GL8ap0bFY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=YoFovARI; arc=none smtp.client-ip=95.215.58.182 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="YoFovARI" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1747199331; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=mzW+LD8ZEG60wZ0j6G3XrmFnJ+CfbtsHJ8eUjJxkklQ=; b=YoFovARIwG2eKpzspzAbbuE0fz7QeHW2KPhjX6GO+wXrmRlmIK9emr+bBbYFAc3dinKO2q 4I2X1GTDbQQIUHW/ADzwyCQn5Yk28AkOoiEuDrJrCx2QPOG5E18BI+/OWyLYZjkHh2AmZ1 0LMgqr1i9erj3T3+cORkMgJ5fd4M3zE= From: Shakeel Butt To: Andrew Morton Cc: Johannes Weiner , Michal Hocko , Roman Gushchin , Muchun Song , Vlastimil Babka , Alexei Starovoitov , Sebastian Andrzej Siewior , Harry Yoo , Yosry Ahmed , bpf@vger.kernel.org, linux-mm@kvack.org, cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, Meta kernel team Subject: [PATCH 3/7] memcg: make mod_memcg_state re-entrant safe against irqs Date: Tue, 13 May 2025 22:08:09 -0700 Message-ID: <20250514050813.2526843-4-shakeel.butt@linux.dev> In-Reply-To: <20250514050813.2526843-1-shakeel.butt@linux.dev> References: <20250514050813.2526843-1-shakeel.butt@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Let's make mod_memcg_state re-entrant safe against irqs. The only thing needed is to convert the usage of __this_cpu_add() to this_cpu_add(). In addition, with re-entrant safety, there is no need to disable irqs. mod_memcg_state() is not safe against nmi, so let's add warning if someone tries to call it in nmi context. Signed-off-by: Shakeel Butt Acked-by: Vlastimil Babka --- include/linux/memcontrol.h | 20 ++------------------ mm/memcontrol.c | 8 ++++---- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 308c01bf98f5..38a5d48400bf 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -905,19 +905,9 @@ struct mem_cgroup *mem_cgroup_get_oom_group(struct tas= k_struct *victim, struct mem_cgroup *oom_domain); void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); =20 -void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, - int val); - /* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void mod_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx, int val) -{ - unsigned long flags; - - local_irq_save(flags); - __mod_memcg_state(memcg, idx, val); - local_irq_restore(flags); -} +void mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, int val); =20 static inline void mod_memcg_page_state(struct page *page, enum memcg_stat_item idx, int val) @@ -1384,12 +1374,6 @@ static inline void mem_cgroup_print_oom_group(struct= mem_cgroup *memcg) { } =20 -static inline void __mod_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx, - int nr) -{ -} - static inline void mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, int nr) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8c8e0e1acd71..75616cd89aa1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -682,12 +682,12 @@ static int memcg_state_val_in_pages(int idx, int val) } =20 /** - * __mod_memcg_state - update cgroup memory statistics + * mod_memcg_state - update cgroup memory statistics * @memcg: the memory cgroup * @idx: the stat item - can be enum memcg_stat_item or enum node_stat_item * @val: delta to add to the counter, can be negative */ -void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, +void mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, int val) { int i =3D memcg_stats_index(idx); @@ -701,7 +701,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, enum m= emcg_stat_item idx, =20 cpu =3D get_cpu(); =20 - __this_cpu_add(memcg->vmstats_percpu->state[i], val); + this_cpu_add(memcg->vmstats_percpu->state[i], val); val =3D memcg_state_val_in_pages(idx, val); memcg_rstat_updated(memcg, val, cpu); trace_mod_memcg_state(memcg, idx, val); @@ -2945,7 +2945,7 @@ static void drain_obj_stock(struct obj_stock_pcp *sto= ck) =20 memcg =3D get_mem_cgroup_from_objcg(old); =20 - __mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages); + mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages); memcg1_account_kmem(memcg, -nr_pages); if (!mem_cgroup_is_root(memcg)) memcg_uncharge(memcg, nr_pages); --=20 2.47.1 From nobody Sun Feb 8 01:29:55 2026 Received: from out-182.mta0.migadu.com (out-182.mta0.migadu.com [91.218.175.182]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id AE6F81F8BD6 for ; Wed, 14 May 2025 05:08:56 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=91.218.175.182 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1747199339; cv=none; b=Xmx8juk8I2E+9x7WChObQhhTAxDthGdpGFim41IIG8qUDte32LfRjudGjs0Ml9m71o1k3n65qJ+b2rBnyMLIymabT8nbb/KaB7QlswMg5En/tBhTLfsJOxKPo0pw7xtN6eGpEkuGYYYrt7K8jgYGM76+4p/EikDxzO+SBdFGEIM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1747199339; c=relaxed/simple; bh=/2eerQEC/naijXcmvzjoE05/UHwzf/HEwpK21ozoSSQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=PYOYwsGlSYHuj+W7jtnEqUf432Hr5aG83PzMY2GnPjSQkcI3H96+zAtct4foL8xEuhpR1siNedzkizfIb4KmJjQjSApvMPb0PwbZKGLc4163lvHi2PnH47FLHk7i8JVelBv5zBj29lZ+/+sYsCNoHUe0t9PBIcB6ZAK+/4Eh7GU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=PI+Wah95; arc=none smtp.client-ip=91.218.175.182 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="PI+Wah95" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1747199334; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=xC5RBxF4eBzFymaVkf9baProDnaZ4BeheVCZ1DfU/iY=; b=PI+Wah95f6MktFReQlC/4aD465VPjdqw+yos246Ps3NMi+oVXCoK7JlakLaMf5CyEDIpwI eXmnkviLfiN4yhHfA5YuLKcvFXKwbUTwMkmirDXq0FZX917m2gTcEoEUh5qQUIv7V8W7zN NWXaEq3w6rVYg/RyCkoClgSuMC10OXg= From: Shakeel Butt To: Andrew Morton Cc: Johannes Weiner , Michal Hocko , Roman Gushchin , Muchun Song , Vlastimil Babka , Alexei Starovoitov , Sebastian Andrzej Siewior , Harry Yoo , Yosry Ahmed , bpf@vger.kernel.org, linux-mm@kvack.org, cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, Meta kernel team Subject: [PATCH 4/7] memcg: make count_memcg_events re-entrant safe against irqs Date: Tue, 13 May 2025 22:08:10 -0700 Message-ID: <20250514050813.2526843-5-shakeel.butt@linux.dev> In-Reply-To: <20250514050813.2526843-1-shakeel.butt@linux.dev> References: <20250514050813.2526843-1-shakeel.butt@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Let's make count_memcg_events re-entrant safe against irqs. The only thing needed is to convert the usage of __this_cpu_add() to this_cpu_add(). In addition, with re-entrant safety, there is no need to disable irqs. Also add warnings for in_nmi() as it is not safe against nmi context. Signed-off-by: Shakeel Butt Acked-by: Vlastimil Babka --- include/linux/memcontrol.h | 21 ++------------------- mm/memcontrol-v1.c | 6 +++--- mm/memcontrol.c | 6 +++--- mm/swap.c | 8 ++++---- mm/vmscan.c | 14 +++++++------- 5 files changed, 19 insertions(+), 36 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 38a5d48400bf..0a8336e8709f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -946,19 +946,8 @@ static inline void mod_lruvec_kmem_state(void *p, enum= node_stat_item idx, local_irq_restore(flags); } =20 -void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, - unsigned long count); - -static inline void count_memcg_events(struct mem_cgroup *memcg, - enum vm_event_item idx, - unsigned long count) -{ - unsigned long flags; - - local_irq_save(flags); - __count_memcg_events(memcg, idx, count); - local_irq_restore(flags); -} +void count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, + unsigned long count); =20 static inline void count_memcg_folio_events(struct folio *folio, enum vm_event_item idx, unsigned long nr) @@ -1432,12 +1421,6 @@ static inline void mod_lruvec_kmem_state(void *p, en= um node_stat_item idx, } =20 static inline void count_memcg_events(struct mem_cgroup *memcg, - enum vm_event_item idx, - unsigned long count) -{ -} - -static inline void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, unsigned long count) { diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 3852f0713ad2..581c960ba19b 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -512,9 +512,9 @@ static void memcg1_charge_statistics(struct mem_cgroup = *memcg, int nr_pages) { /* pagein of a big page is an event. So, ignore page size */ if (nr_pages > 0) - __count_memcg_events(memcg, PGPGIN, 1); + count_memcg_events(memcg, PGPGIN, 1); else { - __count_memcg_events(memcg, PGPGOUT, 1); + count_memcg_events(memcg, PGPGOUT, 1); nr_pages =3D -nr_pages; /* for event */ } =20 @@ -689,7 +689,7 @@ void memcg1_uncharge_batch(struct mem_cgroup *memcg, un= signed long pgpgout, unsigned long flags; =20 local_irq_save(flags); - __count_memcg_events(memcg, PGPGOUT, pgpgout); + count_memcg_events(memcg, PGPGOUT, pgpgout); __this_cpu_add(memcg->events_percpu->nr_page_events, nr_memory); memcg1_check_events(memcg, nid); local_irq_restore(flags); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 75616cd89aa1..b666cdb1af68 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -826,12 +826,12 @@ void __mod_lruvec_kmem_state(void *p, enum node_stat_= item idx, int val) } =20 /** - * __count_memcg_events - account VM events in a cgroup + * count_memcg_events - account VM events in a cgroup * @memcg: the memory cgroup * @idx: the event item * @count: the number of events that occurred */ -void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, +void count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, unsigned long count) { int i =3D memcg_events_index(idx); @@ -845,7 +845,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enu= m vm_event_item idx, =20 cpu =3D get_cpu(); =20 - __this_cpu_add(memcg->vmstats_percpu->events[i], count); + this_cpu_add(memcg->vmstats_percpu->events[i], count); memcg_rstat_updated(memcg, count, cpu); trace_count_memcg_events(memcg, idx, count); =20 diff --git a/mm/swap.c b/mm/swap.c index 77b2d5997873..4fc322f7111a 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -309,7 +309,7 @@ static void lru_activate(struct lruvec *lruvec, struct = folio *folio) trace_mm_lru_activate(folio); =20 __count_vm_events(PGACTIVATE, nr_pages); - __count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE, nr_pages); + count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE, nr_pages); } =20 #ifdef CONFIG_SMP @@ -581,7 +581,7 @@ static void lru_deactivate_file(struct lruvec *lruvec, = struct folio *folio) =20 if (active) { __count_vm_events(PGDEACTIVATE, nr_pages); - __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, + count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_pages); } } @@ -599,7 +599,7 @@ static void lru_deactivate(struct lruvec *lruvec, struc= t folio *folio) lruvec_add_folio(lruvec, folio); =20 __count_vm_events(PGDEACTIVATE, nr_pages); - __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_pages); + count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_pages); } =20 static void lru_lazyfree(struct lruvec *lruvec, struct folio *folio) @@ -625,7 +625,7 @@ static void lru_lazyfree(struct lruvec *lruvec, struct = folio *folio) lruvec_add_folio(lruvec, folio); =20 __count_vm_events(PGLAZYFREE, nr_pages); - __count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE, nr_pages); + count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE, nr_pages); } =20 /* diff --git a/mm/vmscan.c b/mm/vmscan.c index 0eda493fc383..f8dfd2864bbf 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2037,7 +2037,7 @@ static unsigned long shrink_inactive_list(unsigned lo= ng nr_to_scan, item =3D PGSCAN_KSWAPD + reclaimer_offset(sc); if (!cgroup_reclaim(sc)) __count_vm_events(item, nr_scanned); - __count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned); + count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned); __count_vm_events(PGSCAN_ANON + file, nr_scanned); =20 spin_unlock_irq(&lruvec->lru_lock); @@ -2057,7 +2057,7 @@ static unsigned long shrink_inactive_list(unsigned lo= ng nr_to_scan, item =3D PGSTEAL_KSWAPD + reclaimer_offset(sc); if (!cgroup_reclaim(sc)) __count_vm_events(item, nr_reclaimed); - __count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed); + count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed); __count_vm_events(PGSTEAL_ANON + file, nr_reclaimed); spin_unlock_irq(&lruvec->lru_lock); =20 @@ -2147,7 +2147,7 @@ static void shrink_active_list(unsigned long nr_to_sc= an, =20 if (!cgroup_reclaim(sc)) __count_vm_events(PGREFILL, nr_scanned); - __count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned); + count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned); =20 spin_unlock_irq(&lruvec->lru_lock); =20 @@ -2204,7 +2204,7 @@ static void shrink_active_list(unsigned long nr_to_sc= an, nr_deactivate =3D move_folios_to_lru(lruvec, &l_inactive); =20 __count_vm_events(PGDEACTIVATE, nr_deactivate); - __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate); + count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate); =20 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&lruvec->lru_lock); @@ -4621,8 +4621,8 @@ static int scan_folios(struct lruvec *lruvec, struct = scan_control *sc, __count_vm_events(item, isolated); __count_vm_events(PGREFILL, sorted); } - __count_memcg_events(memcg, item, isolated); - __count_memcg_events(memcg, PGREFILL, sorted); + count_memcg_events(memcg, item, isolated); + count_memcg_events(memcg, PGREFILL, sorted); __count_vm_events(PGSCAN_ANON + type, isolated); trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, MAX_LRU_BATCH, scanned, skipped, isolated, @@ -4772,7 +4772,7 @@ static int evict_folios(struct lruvec *lruvec, struct= scan_control *sc, int swap item =3D PGSTEAL_KSWAPD + reclaimer_offset(sc); if (!cgroup_reclaim(sc)) __count_vm_events(item, reclaimed); - __count_memcg_events(memcg, item, reclaimed); + count_memcg_events(memcg, item, reclaimed); __count_vm_events(PGSTEAL_ANON + type, reclaimed); =20 spin_unlock_irq(&lruvec->lru_lock); --=20 2.47.1 From nobody Sun Feb 8 01:29:55 2026 Received: from out-178.mta0.migadu.com (out-178.mta0.migadu.com [91.218.175.178]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 980B81FAC34 for ; Wed, 14 May 2025 05:08:59 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=91.218.175.178 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1747199341; cv=none; b=i093uohkuKSHpdiEdtcVbUyRIsl1Q72Yc2rr/BRA82WsyxIhhfPMCLgcD7uAAZjjUdBmRg6RtfbsxZ2AGAEN4t3L68xbGn1OGTUoUMRttmjpa8mnoU/q8m5J3R2XjotZWpMq2G/1Z+8nHYWunK3OlgXkDf8zJf6tzp1MKuiYjfg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1747199341; c=relaxed/simple; bh=9G93xRpalAPmzoB0gd9eGKyJD5r+wMSs2rOoBnThEB8=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=hKqfJ6qH4S2BuJz1Rertdar1F1EA6fPyEumA3XFir46s+2S6atj1y/NTOEx0/4UpUUdiS9yTLVdyCS9M6EBaKk+dCQ2U5WExWK0h1M3jfw2wL6YSyGQpvpOnYFepTOmlod203qNYXrB9asK8H7IIJaJGen4G1EYevZCXXe9UUDM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=N+woL7ya; arc=none smtp.client-ip=91.218.175.178 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="N+woL7ya" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1747199337; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=lcPht+GPMt0yev/Qh9l4mZlCLB+ximZHBdS6AfFo5Is=; b=N+woL7yakbLXHSkg0D4+onHGELWFvbVkocWEWivtqfAyf19m78VaP0Yf1jF+i4mnw2rT3B LmklWcglrPX5Hm1MXJWgcDrgHkpVlnBNps6xpadkyAFwHtii03rmxln6RdVdJY2eABDiVa HLo3+j2gbHWT9rgePUcF8a2qCYciHJM= From: Shakeel Butt To: Andrew Morton Cc: Johannes Weiner , Michal Hocko , Roman Gushchin , Muchun Song , Vlastimil Babka , Alexei Starovoitov , Sebastian Andrzej Siewior , Harry Yoo , Yosry Ahmed , bpf@vger.kernel.org, linux-mm@kvack.org, cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, Meta kernel team Subject: [PATCH 5/7] memcg: make __mod_memcg_lruvec_state re-entrant safe against irqs Date: Tue, 13 May 2025 22:08:11 -0700 Message-ID: <20250514050813.2526843-6-shakeel.butt@linux.dev> In-Reply-To: <20250514050813.2526843-1-shakeel.butt@linux.dev> References: <20250514050813.2526843-1-shakeel.butt@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Let's make __mod_memcg_lruvec_state re-entrant safe and name it mod_memcg_lruvec_state(). The only thing needed is to convert the usage of __this_cpu_add() to this_cpu_add(). There are two callers of mod_memcg_lruvec_state() and one of them i.e. __mod_objcg_mlstate() will be re-entrant safe as well, so, rename it mod_objcg_mlstate(). The last caller __mod_lruvec_state() still calls __mod_node_page_state() which is not re-entrant safe yet, so keep it as is. Signed-off-by: Shakeel Butt Acked-by: Vlastimil Babka --- mm/memcontrol.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b666cdb1af68..4f19fe9de5bf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -728,7 +728,7 @@ unsigned long memcg_page_state_local(struct mem_cgroup = *memcg, int idx) } #endif =20 -static void __mod_memcg_lruvec_state(struct lruvec *lruvec, +static void mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { @@ -746,10 +746,10 @@ static void __mod_memcg_lruvec_state(struct lruvec *l= ruvec, cpu =3D get_cpu(); =20 /* Update memcg */ - __this_cpu_add(memcg->vmstats_percpu->state[i], val); + this_cpu_add(memcg->vmstats_percpu->state[i], val); =20 /* Update lruvec */ - __this_cpu_add(pn->lruvec_stats_percpu->state[i], val); + this_cpu_add(pn->lruvec_stats_percpu->state[i], val); =20 val =3D memcg_state_val_in_pages(idx, val); memcg_rstat_updated(memcg, val, cpu); @@ -776,7 +776,7 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum nod= e_stat_item idx, =20 /* Update memcg and lruvec */ if (!mem_cgroup_disabled()) - __mod_memcg_lruvec_state(lruvec, idx, val); + mod_memcg_lruvec_state(lruvec, idx, val); } =20 void __lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, @@ -2552,7 +2552,7 @@ static void commit_charge(struct folio *folio, struct= mem_cgroup *memcg) folio->memcg_data =3D (unsigned long)memcg; } =20 -static inline void __mod_objcg_mlstate(struct obj_cgroup *objcg, +static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, struct pglist_data *pgdat, enum node_stat_item idx, int nr) { @@ -2562,7 +2562,7 @@ static inline void __mod_objcg_mlstate(struct obj_cgr= oup *objcg, rcu_read_lock(); memcg =3D obj_cgroup_memcg(objcg); lruvec =3D mem_cgroup_lruvec(memcg, pgdat); - __mod_memcg_lruvec_state(lruvec, idx, nr); + mod_memcg_lruvec_state(lruvec, idx, nr); rcu_read_unlock(); } =20 @@ -2872,12 +2872,12 @@ static void __account_obj_stock(struct obj_cgroup *= objcg, struct pglist_data *oldpg =3D stock->cached_pgdat; =20 if (stock->nr_slab_reclaimable_b) { - __mod_objcg_mlstate(objcg, oldpg, NR_SLAB_RECLAIMABLE_B, + mod_objcg_mlstate(objcg, oldpg, NR_SLAB_RECLAIMABLE_B, stock->nr_slab_reclaimable_b); stock->nr_slab_reclaimable_b =3D 0; } if (stock->nr_slab_unreclaimable_b) { - __mod_objcg_mlstate(objcg, oldpg, NR_SLAB_UNRECLAIMABLE_B, + mod_objcg_mlstate(objcg, oldpg, NR_SLAB_UNRECLAIMABLE_B, stock->nr_slab_unreclaimable_b); stock->nr_slab_unreclaimable_b =3D 0; } @@ -2903,7 +2903,7 @@ static void __account_obj_stock(struct obj_cgroup *ob= jcg, } } if (nr) - __mod_objcg_mlstate(objcg, pgdat, idx, nr); + mod_objcg_mlstate(objcg, pgdat, idx, nr); } =20 static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_by= tes, @@ -2972,13 +2972,13 @@ static void drain_obj_stock(struct obj_stock_pcp *s= tock) */ if (stock->nr_slab_reclaimable_b || stock->nr_slab_unreclaimable_b) { if (stock->nr_slab_reclaimable_b) { - __mod_objcg_mlstate(old, stock->cached_pgdat, + mod_objcg_mlstate(old, stock->cached_pgdat, NR_SLAB_RECLAIMABLE_B, stock->nr_slab_reclaimable_b); stock->nr_slab_reclaimable_b =3D 0; } if (stock->nr_slab_unreclaimable_b) { - __mod_objcg_mlstate(old, stock->cached_pgdat, + mod_objcg_mlstate(old, stock->cached_pgdat, NR_SLAB_UNRECLAIMABLE_B, stock->nr_slab_unreclaimable_b); stock->nr_slab_unreclaimable_b =3D 0; --=20 2.47.1 From nobody Sun Feb 8 01:29:55 2026 Received: from out-180.mta0.migadu.com (out-180.mta0.migadu.com [91.218.175.180]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 79A741FC7C5 for ; Wed, 14 May 2025 05:09:02 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=91.218.175.180 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1747199344; cv=none; b=gqYMVnX2l7OHq9WHomZ6v6sva/cDS3Y6HrpLWPx9do5+WUHpG0y4D/HpVjl6vyRL8mvMcJsicuWTFSS6Z5cT4co6FZ96dNQdHB9LYfr3fjEldQ4o3vKCfN6nA0ok/92ekwyipDVp2UBBu/XR2D3esMQHkc416jtp9OER8YDgig0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1747199344; c=relaxed/simple; bh=SEJ3LGQS31qJB107TFfmG+4J9z28S29RJDM8tdnS1nU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=pJywkMR5l9sLhLMERMHrU5jhZ9Vk1CytVGQmg41cxjY7TYdkztR99yLptYh16nSOAbtDcM1XtTfU8812v1qlk6fvMPwwcw4r3SaYYFlY985jFjKC+AR84Dlju5LwdBhmC2XBDrTe12pkUHCY4zJMx/JjDidOXTws0Y3UNmKcyTI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=C+de8KGJ; arc=none smtp.client-ip=91.218.175.180 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="C+de8KGJ" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1747199340; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=0h+SN0UhJ+eLym9lbXu/RGEYZMj+86zExOIBnbok3mA=; b=C+de8KGJN9lp4gQRHrHBI4K2deILhvh8eDjt9b1qW70ETe92jGJ2G5ft+1/TLBJgvvo4Sd 3pF9fDgCGR+Z/uYwP3bFGBQuZQlsqVAZzIuwh1FO4zcThO9kM6mtyAhaDxufaU23KbGCk/ DQHrlLHfhbKtrkK0/btvhoPSRjJlSEQ= From: Shakeel Butt To: Andrew Morton Cc: Johannes Weiner , Michal Hocko , Roman Gushchin , Muchun Song , Vlastimil Babka , Alexei Starovoitov , Sebastian Andrzej Siewior , Harry Yoo , Yosry Ahmed , bpf@vger.kernel.org, linux-mm@kvack.org, cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, Meta kernel team Subject: [PATCH 6/7] memcg: no stock lock for cpu hot-unplug Date: Tue, 13 May 2025 22:08:12 -0700 Message-ID: <20250514050813.2526843-7-shakeel.butt@linux.dev> In-Reply-To: <20250514050813.2526843-1-shakeel.butt@linux.dev> References: <20250514050813.2526843-1-shakeel.butt@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Previously on the cpu hot-unplug, the kernel would call drain_obj_stock() with objcg local lock. However local lock was not needed as the stock which was accessed belongs to a dead cpu but we kept it there to disable irqs as drain_obj_stock() may call mod_objcg_mlstate() which required irqs disabled. However there is no need to disable irqs now for mod_objcg_mlstate(), so we can remove the local lock altogether from cpu hot-unplug path. Signed-off-by: Shakeel Butt Acked-by: Vlastimil Babka --- mm/memcontrol.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4f19fe9de5bf..78a41378b8f3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2050,17 +2050,8 @@ void drain_all_stock(struct mem_cgroup *root_memcg) =20 static int memcg_hotplug_cpu_dead(unsigned int cpu) { - struct obj_stock_pcp *obj_st; - unsigned long flags; - - obj_st =3D &per_cpu(obj_stock, cpu); - - /* drain_obj_stock requires objstock.lock */ - local_lock_irqsave(&obj_stock.lock, flags); - drain_obj_stock(obj_st); - local_unlock_irqrestore(&obj_stock.lock, flags); - /* no need for the local lock */ + drain_obj_stock(&per_cpu(obj_stock, cpu)); drain_stock_fully(&per_cpu(memcg_stock, cpu)); =20 return 0; --=20 2.47.1 From nobody Sun Feb 8 01:29:55 2026 Received: from out-173.mta1.migadu.com (out-173.mta1.migadu.com [95.215.58.173]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 63DF31F540F for ; Wed, 14 May 2025 05:09:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=95.215.58.173 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1747199350; cv=none; b=bkLKQgeycRxp/FWp3Vl8nZf4BFqaXScG/yUnI2tLgIyHs9oga3rLr9PmpbmC5op1JwSywHOhTOsQ7REfDE6Vt7Mos3cmJcMHZNwbZDzaZLVGPggCp4mNwcPJ4il9oD7dxQoShrW+SAqNTzEGeH/INxA9NkW0x3eDxdje6DyZBAs= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1747199350; c=relaxed/simple; bh=cMXg1Zoafpf6T/Bl4D/YqFLGcqsmHolvDIggz69J7Jg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=VtTtDQOnt/fwUEm3l3qTp0M5VjaYpXgcwAtcB5ROWNnln/vVSAhgzQ3MIIbHcF9VMJJnL7L8V5ONNkkKfTl/IkKwFVwAS4G2A89G3wuqpSa2bFVUhEE0zWNF3RvAKUlKySavJlCqp0N8aQkddc66WMpQAgw2ZAleFPEvRxlX2a8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=BB6zKS6f; arc=none smtp.client-ip=95.215.58.173 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="BB6zKS6f" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1747199346; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=NuiqYU5xQUJzlD4OF0DY6DzP++awncv0NldqTEFbrbk=; b=BB6zKS6fZt3IpK/PR2CCdNoGUYOkfDyeW0jJMjAtx0JRxkzb3wR1CwWeIyeUXF6F8CVPyQ RIxNVBrlQPoCKw9bPu6nsPBuDBXncIYIHIJ/NBJu2AQ/6VwHqfxddK4NDXmwlv6OuYPqAu 3X6ncWadCJ7SIUgeaoUy8vmHHxLLaVg= From: Shakeel Butt To: Andrew Morton Cc: Johannes Weiner , Michal Hocko , Roman Gushchin , Muchun Song , Vlastimil Babka , Alexei Starovoitov , Sebastian Andrzej Siewior , Harry Yoo , Yosry Ahmed , bpf@vger.kernel.org, linux-mm@kvack.org, cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, Meta kernel team Subject: [PATCH 7/7] memcg: objcg stock trylock without irq disabling Date: Tue, 13 May 2025 22:08:13 -0700 Message-ID: <20250514050813.2526843-8-shakeel.butt@linux.dev> In-Reply-To: <20250514050813.2526843-1-shakeel.butt@linux.dev> References: <20250514050813.2526843-1-shakeel.butt@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" There is no need to disable irqs to use objcg per-cpu stock, so let's just not do that but consume_obj_stock() and refill_obj_stock() will need to use trylock instead to avoid deadlock against irq. One consequence of this change is that the charge request from irq context may take slowpath more often but it should be rare. Signed-off-by: Shakeel Butt Acked-by: Vlastimil Babka --- mm/memcontrol.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 78a41378b8f3..73b19137901a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1907,18 +1907,17 @@ static void drain_local_memcg_stock(struct work_str= uct *dummy) static void drain_local_obj_stock(struct work_struct *dummy) { struct obj_stock_pcp *stock; - unsigned long flags; =20 if (WARN_ONCE(!in_task(), "drain in non-task context")) return; =20 - local_lock_irqsave(&obj_stock.lock, flags); + local_lock(&obj_stock.lock); =20 stock =3D this_cpu_ptr(&obj_stock); drain_obj_stock(stock); clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); =20 - local_unlock_irqrestore(&obj_stock.lock, flags); + local_unlock(&obj_stock.lock); } =20 static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) @@ -2901,10 +2900,10 @@ static bool consume_obj_stock(struct obj_cgroup *ob= jcg, unsigned int nr_bytes, struct pglist_data *pgdat, enum node_stat_item idx) { struct obj_stock_pcp *stock; - unsigned long flags; bool ret =3D false; =20 - local_lock_irqsave(&obj_stock.lock, flags); + if (!local_trylock(&obj_stock.lock)) + return ret; =20 stock =3D this_cpu_ptr(&obj_stock); if (objcg =3D=3D READ_ONCE(stock->cached_objcg) && stock->nr_bytes >=3D n= r_bytes) { @@ -2915,7 +2914,7 @@ static bool consume_obj_stock(struct obj_cgroup *objc= g, unsigned int nr_bytes, __account_obj_stock(objcg, stock, nr_bytes, pgdat, idx); } =20 - local_unlock_irqrestore(&obj_stock.lock, flags); + local_unlock(&obj_stock.lock); =20 return ret; } @@ -3004,10 +3003,16 @@ static void refill_obj_stock(struct obj_cgroup *obj= cg, unsigned int nr_bytes, enum node_stat_item idx) { struct obj_stock_pcp *stock; - unsigned long flags; unsigned int nr_pages =3D 0; =20 - local_lock_irqsave(&obj_stock.lock, flags); + if (!local_trylock(&obj_stock.lock)) { + if (pgdat) + mod_objcg_mlstate(objcg, pgdat, idx, nr_bytes); + nr_pages =3D nr_bytes >> PAGE_SHIFT; + nr_bytes =3D nr_bytes & (PAGE_SIZE - 1); + atomic_add(nr_bytes, &objcg->nr_charged_bytes); + goto out; + } =20 stock =3D this_cpu_ptr(&obj_stock); if (READ_ONCE(stock->cached_objcg) !=3D objcg) { /* reset if necessary */ @@ -3029,8 +3034,8 @@ static void refill_obj_stock(struct obj_cgroup *objcg= , unsigned int nr_bytes, stock->nr_bytes &=3D (PAGE_SIZE - 1); } =20 - local_unlock_irqrestore(&obj_stock.lock, flags); - + local_unlock(&obj_stock.lock); +out: if (nr_pages) obj_cgroup_uncharge_pages(objcg, nr_pages); } --=20 2.47.1